Explorar el Código

fix(api): enhance file factory URL handling (#9631)

-LAN- hace 6 meses
padre
commit
ef5f476cd6
Se han modificado 1 ficheros con 17 adiciones y 12 borrados
  1. 17 12
      api/factories/file_factory.py

+ 17 - 12
api/factories/file_factory.py

@@ -2,6 +2,7 @@ import mimetypes
 from collections.abc import Mapping, Sequence
 from typing import Any
 
+import httpx
 from sqlalchemy import select
 
 from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS
@@ -154,7 +155,7 @@ def _build_from_local_file(
     file = File(
         id=mapping.get("id"),
         filename=row.name,
-        extension=row.extension,
+        extension="." + row.extension,
         mime_type=row.mime_type,
         tenant_id=tenant_id,
         type=file_type,
@@ -177,25 +178,29 @@ def _build_from_remote_url(
     url = mapping.get("url")
     if not url:
         raise ValueError("Invalid file url")
-    resp = ssrf_proxy.head(url, follow_redirects=True)
-    resp.raise_for_status()
 
-    # Try to extract filename from response headers or URL
-    content_disposition = resp.headers.get("Content-Disposition")
-    if content_disposition:
-        filename = content_disposition.split("filename=")[-1].strip('"')
+    resp = ssrf_proxy.head(url, follow_redirects=True)
+    if resp.status_code == httpx.codes.OK:
+        # Try to extract filename from response headers or URL
+        content_disposition = resp.headers.get("Content-Disposition")
+        if content_disposition:
+            filename = content_disposition.split("filename=")[-1].strip('"')
+        else:
+            filename = url.split("/")[-1].split("?")[0]
+        # Create the File object
+        file_size = int(resp.headers.get("Content-Length", -1))
+        mime_type = str(resp.headers.get("Content-Type", ""))
     else:
-        filename = url.split("/")[-1].split("?")[0]
+        filename = ""
+        file_size = -1
+        mime_type = ""
+
     # If filename is empty, set a default one
     if not filename:
         filename = "unknown_file"
-
     # Determine file extension
     extension = "." + filename.split(".")[-1] if "." in filename else ".bin"
 
-    # Create the File object
-    file_size = int(resp.headers.get("Content-Length", -1))
-    mime_type = str(resp.headers.get("Content-Type", ""))
     if not mime_type:
         mime_type, _ = mimetypes.guess_type(url)
     file = File(