12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import mimetypes
- import os
- import re
- import urllib.parse
- from uuid import uuid4
- import httpx
- from pydantic import BaseModel
- class FileInfo(BaseModel):
- filename: str
- extension: str
- mimetype: str
- size: int
- def guess_file_info_from_response(response: httpx.Response):
- url = str(response.url)
- # Try to extract filename from URL
- parsed_url = urllib.parse.urlparse(url)
- url_path = parsed_url.path
- filename = os.path.basename(url_path)
- # If filename couldn't be extracted, use Content-Disposition header
- if not filename:
- content_disposition = response.headers.get("Content-Disposition")
- if content_disposition:
- filename_match = re.search(r'filename="?(.+)"?', content_disposition)
- if filename_match:
- filename = filename_match.group(1)
- # If still no filename, generate a unique one
- if not filename:
- unique_name = str(uuid4())
- filename = f"{unique_name}"
- # Guess MIME type from filename first, then URL
- mimetype, _ = mimetypes.guess_type(filename)
- if mimetype is None:
- mimetype, _ = mimetypes.guess_type(url)
- if mimetype is None:
- # If guessing fails, use Content-Type from response headers
- mimetype = response.headers.get("Content-Type", "application/octet-stream")
- extension = os.path.splitext(filename)[1]
- # Ensure filename has an extension
- if not extension:
- extension = mimetypes.guess_extension(mimetype) or ".bin"
- filename = f"{filename}{extension}"
- return FileInfo(
- filename=filename,
- extension=extension,
- mimetype=mimetype,
- size=int(response.headers.get("Content-Length", -1)),
- )
|