helpers.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import mimetypes
  2. import os
  3. import re
  4. import urllib.parse
  5. from collections.abc import Mapping
  6. from typing import Any
  7. from uuid import uuid4
  8. import httpx
  9. import magic
  10. from pydantic import BaseModel
  11. from configs import dify_config
  12. class FileInfo(BaseModel):
  13. filename: str
  14. extension: str
  15. mimetype: str
  16. size: int
  17. def guess_file_info_from_response(response: httpx.Response):
  18. url = str(response.url)
  19. # Try to extract filename from URL
  20. parsed_url = urllib.parse.urlparse(url)
  21. url_path = parsed_url.path
  22. filename = os.path.basename(url_path)
  23. # If filename couldn't be extracted, use Content-Disposition header
  24. if not filename:
  25. content_disposition = response.headers.get("Content-Disposition")
  26. if content_disposition:
  27. filename_match = re.search(r'filename="?(.+)"?', content_disposition)
  28. if filename_match:
  29. filename = filename_match.group(1)
  30. # If still no filename, generate a unique one
  31. if not filename:
  32. unique_name = str(uuid4())
  33. filename = f"{unique_name}"
  34. # Guess MIME type from filename first, then URL
  35. mimetype, _ = mimetypes.guess_type(filename)
  36. if mimetype is None:
  37. mimetype, _ = mimetypes.guess_type(url)
  38. if mimetype is None:
  39. # If guessing fails, use Content-Type from response headers
  40. mimetype = response.headers.get("Content-Type", "application/octet-stream")
  41. # Use python-magic to guess MIME type if still unknown or generic
  42. if mimetype == "application/octet-stream":
  43. try:
  44. mimetype = magic.from_buffer(response.content[:1024], mime=True)
  45. except magic.MagicException:
  46. pass
  47. extension = os.path.splitext(filename)[1]
  48. # Ensure filename has an extension
  49. if not extension:
  50. extension = mimetypes.guess_extension(mimetype) or ".bin"
  51. filename = f"{filename}{extension}"
  52. return FileInfo(
  53. filename=filename,
  54. extension=extension,
  55. mimetype=mimetype,
  56. size=int(response.headers.get("Content-Length", -1)),
  57. )
  58. def get_parameters_from_feature_dict(*, features_dict: Mapping[str, Any], user_input_form: list[dict[str, Any]]):
  59. return {
  60. "opening_statement": features_dict.get("opening_statement"),
  61. "suggested_questions": features_dict.get("suggested_questions", []),
  62. "suggested_questions_after_answer": features_dict.get("suggested_questions_after_answer", {"enabled": False}),
  63. "speech_to_text": features_dict.get("speech_to_text", {"enabled": False}),
  64. "text_to_speech": features_dict.get("text_to_speech", {"enabled": False}),
  65. "retriever_resource": features_dict.get("retriever_resource", {"enabled": False}),
  66. "annotation_reply": features_dict.get("annotation_reply", {"enabled": False}),
  67. "more_like_this": features_dict.get("more_like_this", {"enabled": False}),
  68. "user_input_form": user_input_form,
  69. "sensitive_word_avoidance": features_dict.get(
  70. "sensitive_word_avoidance", {"enabled": False, "type": "", "configs": []}
  71. ),
  72. "file_upload": features_dict.get(
  73. "file_upload",
  74. {
  75. "image": {
  76. "enabled": False,
  77. "number_limits": 3,
  78. "detail": "high",
  79. "transfer_methods": ["remote_url", "local_file"],
  80. }
  81. },
  82. ),
  83. "system_parameters": {
  84. "image_file_size_limit": dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT,
  85. "video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT,
  86. "audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT,
  87. "file_size_limit": dify_config.UPLOAD_FILE_SIZE_LIMIT,
  88. "workflow_file_upload_limit": dify_config.WORKFLOW_FILE_UPLOAD_LIMIT,
  89. },
  90. }