Преглед на файлове

deal the external image when extract docx image (#5024)

Jyong преди 11 месеца
родител
ревизия
3b60c28b3a
променени са 1 файла, в които са добавени 19 реда и са изтрити 7 реда
  1. 19 7
      api/core/rag/extractor/word_extractor.py

+ 19 - 7
api/core/rag/extractor/word_extractor.py

@@ -76,13 +76,25 @@ class WordExtractor(BaseExtractor):
         for rel in doc.part.rels.values():
         for rel in doc.part.rels.values():
             if "image" in rel.target_ref:
             if "image" in rel.target_ref:
                 image_count += 1
                 image_count += 1
-                image_ext = rel.target_ref.split('.')[-1]
-                # user uuid as file name
-                file_uuid = str(uuid.uuid4())
-                file_key = 'image_files/' + self.tenant_id + '/' + file_uuid + '.' + image_ext
-                mime_type, _ = mimetypes.guess_type(file_key)
-
-                storage.save(file_key, rel.target_part.blob)
+                if rel.is_external:
+                    url = rel.reltype
+                    response = requests.get(url, stream=True)
+                    if response.status_code == 200:
+                        image_ext = mimetypes.guess_extension(response.headers['Content-Type'])
+                        file_uuid = str(uuid.uuid4())
+                        file_key = 'image_files/' + self.tenant_id + '/' + file_uuid + '.' + image_ext
+                        mime_type, _ = mimetypes.guess_type(file_key)
+                        storage.save(file_key, response.content)
+                    else:
+                        continue
+                else:
+                    image_ext = rel.target_ref.split('.')[-1]
+                    # user uuid as file name
+                    file_uuid = str(uuid.uuid4())
+                    file_key = 'image_files/' + self.tenant_id + '/' + file_uuid + '.' + image_ext
+                    mime_type, _ = mimetypes.guess_type(file_key)
+
+                    storage.save(file_key, rel.target_part.blob)
                 # save file to db
                 # save file to db
                 config = current_app.config
                 config = current_app.config
                 upload_file = UploadFile(
                 upload_file = UploadFile(