Browse Source

Fix HTTP Request node to give priority to file extension of content-disposition (#12653)

Gen Sato 1 month ago
parent
commit
475b8d731e

+ 13 - 6
api/core/workflow/nodes/http_request/entities.py

@@ -109,14 +109,12 @@ class Response:
         3. MIME type analysis
         """
         content_type = self.content_type.split(";")[0].strip().lower()
-        content_disposition = self.response.headers.get("content-disposition", "")
+        parsed_content_disposition = self.parsed_content_disposition
 
         # Check if it's explicitly marked as an attachment
-        if content_disposition:
-            msg = Message()
-            msg["content-disposition"] = content_disposition
-            disp_type = msg.get_content_disposition()  # Returns 'attachment', 'inline', or None
-            filename = msg.get_filename()  # Returns filename if present, None otherwise
+        if parsed_content_disposition:
+            disp_type = parsed_content_disposition.get_content_disposition()  # Returns 'attachment', 'inline', or None
+            filename = parsed_content_disposition.get_filename()  # Returns filename if present, None otherwise
             if disp_type == "attachment" or filename is not None:
                 return True
 
@@ -182,3 +180,12 @@ class Response:
             return f"{(self.size / 1024):.2f} KB"
         else:
             return f"{(self.size / 1024 / 1024):.2f} MB"
+
+    @property
+    def parsed_content_disposition(self) -> Optional[Message]:
+        content_disposition = self.headers.get("content-disposition", "")
+        if content_disposition:
+            msg = Message()
+            msg["content-disposition"] = content_disposition
+            return msg
+        return None

+ 36 - 24
api/core/workflow/nodes/http_request/node.py

@@ -169,32 +169,44 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
         """
         Extract files from response by checking both Content-Type header and URL
         """
-        files = []
+        files: list[File] = []
         is_file = response.is_file
         content_type = response.content_type
         content = response.content
-
-        if is_file:
-            # Guess file extension from URL or Content-Type header
-            filename = url.split("?")[0].split("/")[-1] or ""
-            mime_type = content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
-
-            tool_file = ToolFileManager.create_file_by_raw(
-                user_id=self.user_id,
-                tenant_id=self.tenant_id,
-                conversation_id=None,
-                file_binary=content,
-                mimetype=mime_type,
-            )
-
-            mapping = {
-                "tool_file_id": tool_file.id,
-                "transfer_method": FileTransferMethod.TOOL_FILE.value,
-            }
-            file = file_factory.build_from_mapping(
-                mapping=mapping,
-                tenant_id=self.tenant_id,
-            )
-            files.append(file)
+        parsed_content_disposition = response.parsed_content_disposition
+        content_disposition_type = None
+
+        if not is_file:
+            return files
+
+        if parsed_content_disposition:
+            content_disposition_filename = parsed_content_disposition.get_filename()
+            if content_disposition_filename:
+                # If filename is available from content-disposition, use it to guess the content type
+                content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0]
+
+        # Guess file extension from URL or Content-Type header
+        filename = url.split("?")[0].split("/")[-1] or ""
+        mime_type = (
+            content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
+        )
+
+        tool_file = ToolFileManager.create_file_by_raw(
+            user_id=self.user_id,
+            tenant_id=self.tenant_id,
+            conversation_id=None,
+            file_binary=content,
+            mimetype=mime_type,
+        )
+
+        mapping = {
+            "tool_file_id": tool_file.id,
+            "transfer_method": FileTransferMethod.TOOL_FILE.value,
+        }
+        file = file_factory.build_from_mapping(
+            mapping=mapping,
+            tenant_id=self.tenant_id,
+        )
+        files.append(file)
 
         return files