|
@@ -21,7 +21,7 @@ IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
|
|
|
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
|
|
|
|
|
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls', 'docx', 'csv']
|
|
|
-UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls',
|
|
|
+UNSTRUCTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls',
|
|
|
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 'epub']
|
|
|
|
|
|
PREVIEW_WORDS_LIMIT = 3000
|
|
@@ -36,7 +36,7 @@ class FileService:
|
|
|
if len(filename) > 200:
|
|
|
filename = filename.split('.')[0][:200] + '.' + extension
|
|
|
etl_type = current_app.config['ETL_TYPE']
|
|
|
- allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS if etl_type == 'Unstructured' \
|
|
|
+ allowed_extensions = UNSTRUCTURED_ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS if etl_type == 'Unstructured' \
|
|
|
else ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS
|
|
|
if extension.lower() not in allowed_extensions:
|
|
|
raise UnsupportedFileTypeError()
|
|
@@ -139,7 +139,7 @@ class FileService:
|
|
|
# extract text from file
|
|
|
extension = upload_file.extension
|
|
|
etl_type = current_app.config['ETL_TYPE']
|
|
|
- allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS
|
|
|
+ allowed_extensions = UNSTRUCTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS
|
|
|
if extension.lower() not in allowed_extensions:
|
|
|
raise UnsupportedFileTypeError()
|
|
|
|