4 月之前 · 5a9b785773
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@@ -4,8 +4,8 @@ import json
 
				 
			
 
				 import docx
			
 
				 import pandas as pd
			
 
				-import pypdfium2
			
 
				-import yaml
			
 
				+import pypdfium2  # type: ignore
			
 
				+import yaml  # type: ignore
			
 
				 from unstructured.partition.api import partition_via_api
			
 
				 from unstructured.partition.email import partition_email
			
 
				 from unstructured.partition.epub import partition_epub
			
@@ -237,15 +237,17 @@ def _extract_text_from_csv(file_content: bytes) -> str:
 
				 
			
 
				 def _extract_text_from_excel(file_content: bytes) -> str:
			
 
				     """Extract text from an Excel file using pandas."""
			
 
				-
			
 
				     try:
			
 
				-        df = pd.read_excel(io.BytesIO(file_content))
			
 
				-
			
 
				-        # Drop rows where all elements are NaN
			
 
				-        df.dropna(how="all", inplace=True)
			
 
				-
			
 
				-        # Convert DataFrame to Markdown table
			
 
				-        markdown_table = df.to_markdown(index=False)
			
 
				+        excel_file = pd.ExcelFile(io.BytesIO(file_content))
			
 
				+        markdown_table = ""
			
 
				+        for sheet_name in excel_file.sheet_names:
			
 
				+            try:
			
 
				+                df = excel_file.parse(sheet_name=sheet_name)
			
 
				+                df.dropna(how="all", inplace=True)
			
 
				+                # Create Markdown table two times to separate tables with a newline
			
 
				+                markdown_table += df.to_markdown(index=False) + "\n\n"
			
 
				+            except Exception as e:
			
 
				+                continue
			
 
				         return markdown_table
			
 
				     except Exception as e:
			
 
				         raise TextExtractionError(f"Failed to extract text from Excel file: {str(e)}") from e