Pārlūkot izejas kodu

fix: support mdx files close #11557 (#11565)

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
yihong 4 mēneši atpakaļ
vecāks
revīzija
36cb25b341

+ 2 - 2
api/constants/__init__.py

@@ -14,11 +14,11 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
 
 
 if dify_config.ETL_TYPE == "Unstructured":
-    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls"]
+    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"]
     DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
     if dify_config.UNSTRUCTURED_API_URL:
         DOCUMENT_EXTENSIONS.append("ppt")
     DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
 else:
-    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
+    DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
     DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])

+ 2 - 2
api/core/rag/extractor/extract_processor.py

@@ -103,7 +103,7 @@ class ExtractProcessor:
                         extractor = ExcelExtractor(file_path)
                     elif file_extension == ".pdf":
                         extractor = PdfExtractor(file_path)
-                    elif file_extension in {".md", ".markdown"}:
+                    elif file_extension in {".md", ".markdown", ".mdx"}:
                         extractor = (
                             UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
                             if is_automatic
@@ -141,7 +141,7 @@ class ExtractProcessor:
                         extractor = ExcelExtractor(file_path)
                     elif file_extension == ".pdf":
                         extractor = PdfExtractor(file_path)
-                    elif file_extension in {".md", ".markdown"}:
+                    elif file_extension in {".md", ".markdown", ".mdx"}:
                         extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
                     elif file_extension in {".htm", ".html"}:
                         extractor = HtmlExtractor(file_path)

+ 1 - 0
web/app/components/base/file-icon/index.tsx

@@ -36,6 +36,7 @@ const FileIcon: FC<FileIconProps> = ({
       return <Json className={className} />
     case 'md':
     case 'markdown':
+    case 'mdx':
       return <Md className={className} />
     case 'pdf':
       return <Pdf className={className} />

+ 1 - 1
web/app/components/base/file-uploader/utils.ts

@@ -84,7 +84,7 @@ export const getFileAppearanceType = (fileName: string, fileMimetype: string) =>
   if (extension === 'pdf')
     return FileAppearanceTypeEnum.pdf
 
-  if (extension === 'md' || extension === 'markdown')
+  if (extension === 'md' || extension === 'markdown' || extension === 'mdx')
     return FileAppearanceTypeEnum.markdown
 
   if (extension === 'xlsx' || extension === 'xls')

+ 1 - 1
web/app/components/base/prompt-editor/constants.tsx

@@ -52,7 +52,7 @@ export const getInputVars = (text: string): ValueSelector[] => {
 
 export const FILE_EXTS: Record<string, string[]> = {
   [SupportUploadFileTypes.image]: ['JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'],
-  [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'],
+  [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MDX', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'],
   [SupportUploadFileTypes.audio]: ['MP3', 'M4A', 'WAV', 'WEBM', 'AMR', 'MPGA'],
   [SupportUploadFileTypes.video]: ['MP4', 'MOV', 'MPEG', 'MPGA'],
 }