Sfoglia il codice sorgente

refactor(file_factory): improve filename and mime type determination (#9784)

-LAN- 6 mesi fa
parent
commit
e54b7cda3d
1 ha cambiato i file con 8 aggiunte e 16 eliminazioni
  1. 8 16
      api/factories/file_factory.py

+ 8 - 16
api/factories/file_factory.py

@@ -179,27 +179,19 @@ def _build_from_remote_url(
     if not url:
         raise ValueError("Invalid file url")
 
+    mime_type = mimetypes.guess_type(url)[0] or ""
+    file_size = -1
+    filename = url.split("/")[-1].split("?")[0] or "unknown_file"
+
     resp = ssrf_proxy.head(url, follow_redirects=True)
     if resp.status_code == httpx.codes.OK:
-        # Try to extract filename from response headers or URL
-        content_disposition = resp.headers.get("Content-Disposition")
-        if content_disposition:
+        if content_disposition := resp.headers.get("Content-Disposition"):
             filename = content_disposition.split("filename=")[-1].strip('"')
-        else:
-            filename = url.split("/")[-1].split("?")[0]
-        # Create the File object
-        file_size = int(resp.headers.get("Content-Length", -1))
-        mime_type = str(resp.headers.get("Content-Type", ""))
-    else:
-        filename = ""
-        file_size = -1
-        mime_type = ""
+        file_size = int(resp.headers.get("Content-Length", file_size))
+        mime_type = mime_type or str(resp.headers.get("Content-Type", ""))
 
-    # If filename is empty, set a default one
-    if not filename:
-        filename = "unknown_file"
     # Determine file extension
-    extension = "." + filename.split(".")[-1] if "." in filename else ".bin"
+    extension = mimetypes.guess_extension(mime_type) or "." + filename.split(".")[-1] if "." in filename else ".bin"
 
     if not mime_type:
         mime_type, _ = mimetypes.guess_type(url)