Преглед на файлове

remove unstructured pdf extract (#9794)

Jyong преди 6 месеца
родител
ревизия
5f11fe521d
променени са 1 файла, в които са добавени 1 реда и са изтрити 2 реда
  1. 1 2
      api/core/rag/extractor/extract_processor.py

+ 1 - 2
api/core/rag/extractor/extract_processor.py

@@ -21,7 +21,6 @@ from core.rag.extractor.unstructured.unstructured_eml_extractor import Unstructu
 from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
 from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
 from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
-from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor
 from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
 from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
 from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
@@ -103,7 +102,7 @@ class ExtractProcessor:
                     if file_extension in {".xlsx", ".xls"}:
                         extractor = ExcelExtractor(file_path)
                     elif file_extension == ".pdf":
-                        extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key)
+                        extractor = PdfExtractor(file_path)
                     elif file_extension in {".md", ".markdown"}:
                         extractor = (
                             UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)