helpers.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import mimetypes
  2. import os
  3. import re
  4. import urllib.parse
  5. from uuid import uuid4
  6. import httpx
  7. from pydantic import BaseModel
  8. class FileInfo(BaseModel):
  9. filename: str
  10. extension: str
  11. mimetype: str
  12. size: int
  13. def guess_file_info_from_response(response: httpx.Response):
  14. url = str(response.url)
  15. # Try to extract filename from URL
  16. parsed_url = urllib.parse.urlparse(url)
  17. url_path = parsed_url.path
  18. filename = os.path.basename(url_path)
  19. # If filename couldn't be extracted, use Content-Disposition header
  20. if not filename:
  21. content_disposition = response.headers.get("Content-Disposition")
  22. if content_disposition:
  23. filename_match = re.search(r'filename="?(.+)"?', content_disposition)
  24. if filename_match:
  25. filename = filename_match.group(1)
  26. # If still no filename, generate a unique one
  27. if not filename:
  28. unique_name = str(uuid4())
  29. filename = f"{unique_name}"
  30. # Guess MIME type from filename first, then URL
  31. mimetype, _ = mimetypes.guess_type(filename)
  32. if mimetype is None:
  33. mimetype, _ = mimetypes.guess_type(url)
  34. if mimetype is None:
  35. # If guessing fails, use Content-Type from response headers
  36. mimetype = response.headers.get("Content-Type", "application/octet-stream")
  37. extension = os.path.splitext(filename)[1]
  38. # Ensure filename has an extension
  39. if not extension:
  40. extension = mimetypes.guess_extension(mimetype) or ".bin"
  41. filename = f"{filename}{extension}"
  42. return FileInfo(
  43. filename=filename,
  44. extension=extension,
  45. mimetype=mimetype,
  46. size=int(response.headers.get("Content-Length", -1)),
  47. )