helpers.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. import mimetypes
  2. import os
  3. import platform
  4. import re
  5. import urllib.parse
  6. import warnings
  7. from collections.abc import Mapping
  8. from typing import Any
  9. from uuid import uuid4
  10. import httpx
  11. try:
  12. import magic
  13. except ImportError:
  14. if platform.system() == "Windows":
  15. warnings.warn(
  16. "To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
  17. )
  18. elif platform.system() == "Darwin":
  19. warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
  20. elif platform.system() == "Linux":
  21. warnings.warn(
  22. "To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
  23. )
  24. else:
  25. warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
  26. magic = None # type: ignore
  27. from pydantic import BaseModel
  28. from configs import dify_config
  29. class FileInfo(BaseModel):
  30. filename: str
  31. extension: str
  32. mimetype: str
  33. size: int
  34. def guess_file_info_from_response(response: httpx.Response):
  35. url = str(response.url)
  36. # Try to extract filename from URL
  37. parsed_url = urllib.parse.urlparse(url)
  38. url_path = parsed_url.path
  39. filename = os.path.basename(url_path)
  40. # If filename couldn't be extracted, use Content-Disposition header
  41. if not filename:
  42. content_disposition = response.headers.get("Content-Disposition")
  43. if content_disposition:
  44. filename_match = re.search(r'filename="?(.+)"?', content_disposition)
  45. if filename_match:
  46. filename = filename_match.group(1)
  47. # If still no filename, generate a unique one
  48. if not filename:
  49. unique_name = str(uuid4())
  50. filename = f"{unique_name}"
  51. # Guess MIME type from filename first, then URL
  52. mimetype, _ = mimetypes.guess_type(filename)
  53. if mimetype is None:
  54. mimetype, _ = mimetypes.guess_type(url)
  55. if mimetype is None:
  56. # If guessing fails, use Content-Type from response headers
  57. mimetype = response.headers.get("Content-Type", "application/octet-stream")
  58. # Use python-magic to guess MIME type if still unknown or generic
  59. if mimetype == "application/octet-stream" and magic is not None:
  60. try:
  61. mimetype = magic.from_buffer(response.content[:1024], mime=True)
  62. except magic.MagicException:
  63. pass
  64. extension = os.path.splitext(filename)[1]
  65. # Ensure filename has an extension
  66. if not extension:
  67. extension = mimetypes.guess_extension(mimetype) or ".bin"
  68. filename = f"{filename}{extension}"
  69. return FileInfo(
  70. filename=filename,
  71. extension=extension,
  72. mimetype=mimetype,
  73. size=int(response.headers.get("Content-Length", -1)),
  74. )
  75. def get_parameters_from_feature_dict(*, features_dict: Mapping[str, Any], user_input_form: list[dict[str, Any]]):
  76. return {
  77. "opening_statement": features_dict.get("opening_statement"),
  78. "suggested_questions": features_dict.get("suggested_questions", []),
  79. "suggested_questions_after_answer": features_dict.get("suggested_questions_after_answer", {"enabled": False}),
  80. "speech_to_text": features_dict.get("speech_to_text", {"enabled": False}),
  81. "text_to_speech": features_dict.get("text_to_speech", {"enabled": False}),
  82. "retriever_resource": features_dict.get("retriever_resource", {"enabled": False}),
  83. "annotation_reply": features_dict.get("annotation_reply", {"enabled": False}),
  84. "more_like_this": features_dict.get("more_like_this", {"enabled": False}),
  85. "user_input_form": user_input_form,
  86. "sensitive_word_avoidance": features_dict.get(
  87. "sensitive_word_avoidance", {"enabled": False, "type": "", "configs": []}
  88. ),
  89. "file_upload": features_dict.get(
  90. "file_upload",
  91. {
  92. "image": {
  93. "enabled": False,
  94. "number_limits": 3,
  95. "detail": "high",
  96. "transfer_methods": ["remote_url", "local_file"],
  97. }
  98. },
  99. ),
  100. "system_parameters": {
  101. "image_file_size_limit": dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT,
  102. "video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT,
  103. "audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT,
  104. "file_size_limit": dify_config.UPLOAD_FILE_SIZE_LIMIT,
  105. "workflow_file_upload_limit": dify_config.WORKFLOW_FILE_UPLOAD_LIMIT,
  106. },
  107. }