Ver código fonte

fix: Fix parent child retrieval issues (#12206)

Co-authored-by: NFish <douxc512@gmail.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Wu Tianwei 3 meses atrás
pai
commit
09d759d196
34 arquivos alterados com 446 adições e 387 exclusões
  1. 34 1
      web/app/(commonLayout)/datasets/template/template.en.mdx
  2. 36 3
      web/app/(commonLayout)/datasets/template/template.zh.mdx
  3. 49 70
      web/app/components/app/configuration/dataset-config/params-config/config-content.tsx
  4. 19 18
      web/app/components/app/configuration/dataset-config/params-config/index.tsx
  5. 5 15
      web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
  6. 3 3
      web/app/components/app/configuration/index.tsx
  7. 8 7
      web/app/components/datasets/common/check-rerank-model.ts
  8. 4 1
      web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
  9. 71 45
      web/app/components/datasets/common/retrieval-method-config/index.tsx
  10. 43 59
      web/app/components/datasets/common/retrieval-param-config/index.tsx
  11. 3 0
      web/app/components/datasets/create/embedding-process/index.tsx
  12. 46 58
      web/app/components/datasets/create/step-two/index.tsx
  13. 2 2
      web/app/components/datasets/create/step-two/option-card.tsx
  14. 16 8
      web/app/components/datasets/documents/detail/completed/index.tsx
  15. 1 1
      web/app/components/datasets/documents/detail/completed/segment-list.tsx
  16. 14 4
      web/app/components/datasets/documents/detail/index.tsx
  17. 16 2
      web/app/components/datasets/documents/index.tsx
  18. 17 21
      web/app/components/datasets/documents/list.tsx
  19. 1 1
      web/app/components/datasets/hit-testing/components/child-chunks-item.tsx
  20. 1 1
      web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx
  21. 2 7
      web/app/components/datasets/hit-testing/components/result-item.tsx
  22. 4 4
      web/app/components/datasets/hit-testing/components/score.tsx
  23. 1 1
      web/app/components/datasets/hit-testing/index.tsx
  24. 2 14
      web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
  25. 7 19
      web/app/components/datasets/settings/form/index.tsx
  26. 1 0
      web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx
  27. 5 4
      web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx
  28. 1 1
      web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
  29. 23 10
      web/app/components/workflow/nodes/knowledge-retrieval/utils.ts
  30. 1 1
      web/i18n/en-US/app-debug.ts
  31. 2 2
      web/i18n/en-US/workflow.ts
  32. 1 1
      web/i18n/zh-Hans/app-debug.ts
  33. 1 1
      web/i18n/zh-Hans/workflow.ts
  34. 6 2
      web/service/knowledge/use-document.ts

+ 34 - 1
web/app/(commonLayout)/datasets/template/template.en.mdx

@@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
           - <code>economy</code> Economy: Build using inverted index of keyword table index
       </Property>
+      <Property name='doc_form' type='string' key='doc_form'>
+        Format of indexed content
+          - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
+          - <code>hierarchical_model</code> Parent-child mode
+          - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
+      </Property>
+      <Property name='doc_language' type='string' key='doc_language'>
+        In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
+      </Property>
       <Property name='process_rule' type='object' key='process_rule'>
         Processing rules
           - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
@@ -65,6 +74,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
           - <code>economy</code> Economy: Build using inverted index of keyword table index
 
+        - <code>doc_form</code> Format of indexed content
+          - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
+          - <code>hierarchical_model</code> Parent-child mode
+          - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
+
+        - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
+
         - <code>process_rule</code> Processing rules
           - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
           - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
@@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         Files that need to be uploaded.
@@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -984,7 +1016,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
   method='POST'
-  title='Update a Chunk in a Document '
+  title='Update a Chunk in a Document'
   name='#update_segment'
 />
 <Row>
@@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
         - <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
         - <code>keywords</code> (list) Keyword (optional)
         - <code>enabled</code> (bool) False / true (optional)
+        - <code>regenerate_child_chunks</code> (bool) Whether to regenerate child chunks (optional)
       </Property>
     </Properties>
   </Col>

+ 36 - 3
web/app/(commonLayout)/datasets/template/template.zh.mdx

@@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> 高质量:使用  embedding 模型进行嵌入,构建为向量数据库索引
           - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
       </Property>
+      <Property name='doc_form' type='string' key='doc_form'>
+        索引内容的形式
+          - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
+          - <code>hierarchical_model</code> parent-child 模式
+          - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
+      </Property>
+      <Property name='doc_language' type='string' key='doc_language'>
+        在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
+      </Property>
       <Property name='process_rule' type='object' key='process_rule'>
         处理规则
           - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
@@ -63,8 +72,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
                   - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
               - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
             - <code>segmentation</code> (object) 分段规则
-              - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
+              - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 <code>\n</code>
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> 高质量:使用  embedding 模型进行嵌入,构建为向量数据库索引
           - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
 
+        - <code>doc_form</code> 索引内容的形式
+          - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
+          - <code>hierarchical_model</code> parent-child 模式
+          - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
+
+        - <code>doc_language</code> 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
+
         - <code>process_rule</code> 处理规则
           - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
           - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
@@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         需要上传的文件。
@@ -411,7 +435,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
   method='POST'
-  title='通过文本更新文档 '
+  title='通过文本更新文档'
   name='#update-by-text'
 />
 <Row>
@@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -508,7 +536,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
   method='POST'
-  title='通过文件更新文档  '
+  title='通过文件更新文档'
   name='#update-by-file'
 />
 <Row>
@@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
         - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
         - <code>keywords</code> (list) 关键字,非必填
         - <code>enabled</code> (bool) false/true,非必填
+        - <code>regenerate_child_chunks</code> (bool) 是否重新生成子分段,非必填
       </Property>
     </Properties>
   </Col>

+ 49 - 70
web/app/components/app/configuration/dataset-config/params-config/config-content.tsx

@@ -59,36 +59,24 @@ const ConfigContent: FC<Props> = ({
 
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const {
     currentModel: currentRerankModel,
   } = useCurrentProviderAndModel(
     rerankModelList,
-    rerankDefaultModel
-      ? {
-        ...rerankDefaultModel,
-        provider: rerankDefaultModel.provider.provider,
-      }
-      : undefined,
+    {
+      provider: datasetConfigs.reranking_model?.reranking_provider_name,
+      model: datasetConfigs.reranking_model?.reranking_model_name,
+    },
   )
 
-  const rerankModel = (() => {
-    if (datasetConfigs.reranking_model?.reranking_provider_name) {
-      return {
-        provider_name: datasetConfigs.reranking_model.reranking_provider_name,
-        model_name: datasetConfigs.reranking_model.reranking_model_name,
-      }
+  const rerankModel = useMemo(() => {
+    return {
+      provider_name: datasetConfigs?.reranking_model?.reranking_provider_name ?? '',
+      model_name: datasetConfigs?.reranking_model?.reranking_model_name ?? '',
     }
-    else if (rerankDefaultModel) {
-      return {
-        provider_name: rerankDefaultModel.provider.provider,
-        model_name: rerankDefaultModel.model,
-      }
-    }
-  })()
+  }, [datasetConfigs.reranking_model])
 
   const handleParamChange = (key: string, value: number) => {
     if (key === 'top_k') {
@@ -133,6 +121,12 @@ const ConfigContent: FC<Props> = ({
   }
 
   const handleRerankModeChange = (mode: RerankingModeEnum) => {
+    if (mode === datasetConfigs.reranking_mode)
+      return
+
+    if (mode === RerankingModeEnum.RerankingModel && !currentRerankModel)
+      Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
+
     onChange({
       ...datasetConfigs,
       reranking_mode: mode,
@@ -162,31 +156,25 @@ const ConfigContent: FC<Props> = ({
 
   const canManuallyToggleRerank = useMemo(() => {
     return (selectedDatasetsMode.allInternal && selectedDatasetsMode.allEconomic)
-      || selectedDatasetsMode.allExternal
+    || selectedDatasetsMode.allExternal
   }, [selectedDatasetsMode.allEconomic, selectedDatasetsMode.allExternal, selectedDatasetsMode.allInternal])
 
   const showRerankModel = useMemo(() => {
     if (!canManuallyToggleRerank)
       return true
-    else if (canManuallyToggleRerank && !isRerankDefaultModelValid)
-      return false
 
     return datasetConfigs.reranking_enable
-  }, [canManuallyToggleRerank, datasetConfigs.reranking_enable, isRerankDefaultModelValid])
+  }, [datasetConfigs.reranking_enable, canManuallyToggleRerank])
 
-  const handleDisabledSwitchClick = useCallback(() => {
-    if (!currentRerankModel && !showRerankModel)
+  const handleDisabledSwitchClick = useCallback((enable: boolean) => {
+    if (!currentRerankModel && enable)
       Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
-  }, [currentRerankModel, showRerankModel, t])
-
-  useEffect(() => {
-    if (canManuallyToggleRerank && showRerankModel !== datasetConfigs.reranking_enable) {
-      onChange({
-        ...datasetConfigs,
-        reranking_enable: showRerankModel,
-      })
-    }
-  }, [canManuallyToggleRerank, showRerankModel, datasetConfigs, onChange])
+    onChange({
+      ...datasetConfigs,
+      reranking_enable: enable,
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [currentRerankModel, datasetConfigs, onChange])
 
   return (
     <div>
@@ -267,24 +255,12 @@ const ConfigContent: FC<Props> = ({
                 <div className='flex items-center'>
                   {
                     selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && (
-                      <div
-                        className='flex items-center'
-                        onClick={handleDisabledSwitchClick}
-                      >
-                        <Switch
-                          size='md'
-                          defaultValue={showRerankModel}
-                          disabled={!currentRerankModel || !canManuallyToggleRerank}
-                          onChange={(v) => {
-                            if (canManuallyToggleRerank) {
-                              onChange({
-                                ...datasetConfigs,
-                                reranking_enable: v,
-                              })
-                            }
-                          }}
-                        />
-                      </div>
+                      <Switch
+                        size='md'
+                        defaultValue={showRerankModel}
+                        disabled={!canManuallyToggleRerank}
+                        onChange={handleDisabledSwitchClick}
+                      />
                     )
                   }
                   <div className='leading-[32px] ml-1 text-text-secondary system-sm-semibold'>{t('common.modelProvider.rerankModel.key')}</div>
@@ -298,21 +274,24 @@ const ConfigContent: FC<Props> = ({
                     triggerClassName='ml-1 w-4 h-4'
                   />
                 </div>
-                <div>
-                  <ModelSelector
-                    defaultModel={rerankModel && { provider: rerankModel?.provider_name, model: rerankModel?.model_name }}
-                    onSelect={(v) => {
-                      onChange({
-                        ...datasetConfigs,
-                        reranking_model: {
-                          reranking_provider_name: v.provider,
-                          reranking_model_name: v.model,
-                        },
-                      })
-                    }}
-                    modelList={rerankModelList}
-                  />
-                </div>
+                {
+                  showRerankModel && (
+                    <div>
+                      <ModelSelector
+                        defaultModel={rerankModel && { provider: rerankModel?.provider_name, model: rerankModel?.model_name }}
+                        onSelect={(v) => {
+                          onChange({
+                            ...datasetConfigs,
+                            reranking_model: {
+                              reranking_provider_name: v.provider,
+                              reranking_model_name: v.model,
+                            },
+                          })
+                        }}
+                        modelList={rerankModelList}
+                      />
+                    </div>
+                  )}
               </div>
             )
           }

+ 19 - 18
web/app/components/app/configuration/dataset-config/params-config/index.tsx

@@ -10,7 +10,7 @@ import Modal from '@/app/components/base/modal'
 import Button from '@/app/components/base/button'
 import { RETRIEVE_TYPE } from '@/types/app'
 import Toast from '@/app/components/base/toast'
-import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { useCurrentProviderAndModel, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import { RerankingModeEnum } from '@/models/datasets'
 import type { DataSet } from '@/models/datasets'
@@ -41,17 +41,27 @@ const ParamsConfig = ({
   }, [datasetConfigs])
 
   const {
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
+    modelList: rerankModelList,
+    currentModel: rerankDefaultModel,
     currentProvider: rerankDefaultProvider,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
+  const {
+    currentModel: isCurrentRerankModelValid,
+  } = useCurrentProviderAndModel(
+    rerankModelList,
+    {
+      provider: tempDataSetConfigs.reranking_model?.reranking_provider_name ?? '',
+      model: tempDataSetConfigs.reranking_model?.reranking_model_name ?? '',
+    },
+  )
+
   const isValid = () => {
     let errMsg = ''
     if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) {
       if (tempDataSetConfigs.reranking_enable
         && tempDataSetConfigs.reranking_mode === RerankingModeEnum.RerankingModel
-        && !isRerankDefaultModelValid
+        && !isCurrentRerankModelValid
       )
         errMsg = t('appDebug.datasetConfig.rerankModelRequired')
     }
@@ -66,16 +76,7 @@ const ParamsConfig = ({
   const handleSave = () => {
     if (!isValid())
       return
-    const config = { ...tempDataSetConfigs }
-    if (config.retrieval_model === RETRIEVE_TYPE.multiWay
-      && config.reranking_mode === RerankingModeEnum.RerankingModel
-      && !config.reranking_model) {
-      config.reranking_model = {
-        reranking_provider_name: rerankDefaultModel?.provider?.provider,
-        reranking_model_name: rerankDefaultModel?.model,
-      } as any
-    }
-    setDatasetConfigs(config)
+    setDatasetConfigs(tempDataSetConfigs)
     setRerankSettingModalOpen(false)
   }
 
@@ -94,14 +95,14 @@ const ParamsConfig = ({
       reranking_enable: restConfigs.reranking_enable,
     }, selectedDatasets, selectedDatasets, {
       provider: rerankDefaultProvider?.provider,
-      model: isRerankDefaultModelValid?.model,
+      model: rerankDefaultModel?.model,
     })
 
     setTempDataSetConfigs({
       ...retrievalConfig,
-      reranking_model: restConfigs.reranking_model && {
-        reranking_provider_name: restConfigs.reranking_model.reranking_provider_name,
-        reranking_model_name: restConfigs.reranking_model.reranking_model_name,
+      reranking_model: {
+        reranking_provider_name: retrievalConfig.reranking_model?.provider || '',
+        reranking_model_name: retrievalConfig.reranking_model?.model || '',
       },
       retrieval_model,
       score_threshold_enabled,

+ 5 - 15
web/app/components/app/configuration/dataset-config/settings-modal/index.tsx

@@ -12,7 +12,7 @@ import Divider from '@/app/components/base/divider'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
-import { type DataSet, RerankingModeEnum } from '@/models/datasets'
+import { type DataSet } from '@/models/datasets'
 import { useToastContext } from '@/app/components/base/toast'
 import { updateDatasetSetting } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'
@@ -21,7 +21,7 @@ import type { RetrievalConfig } from '@/types/app'
 import RetrievalSettings from '@/app/components/datasets/external-knowledge-base/create/RetrievalSettings'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
 import PermissionSelector from '@/app/components/datasets/settings/permission-selector'
 import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
@@ -99,8 +99,6 @@ const SettingsModal: FC<SettingsModalProps> = ({
     }
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -109,14 +107,6 @@ const SettingsModal: FC<SettingsModalProps> = ({
       notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    const postRetrievalConfig = ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    })
     try {
       setLoading(true)
       const { id, name, description, permission } = localeCurrentDataset
@@ -128,8 +118,8 @@ const SettingsModal: FC<SettingsModalProps> = ({
           permission,
           indexing_technique: indexMethod,
           retrieval_model: {
-            ...postRetrievalConfig,
-            score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
+            ...retrievalConfig,
+            score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0,
           },
           embedding_model: localeCurrentDataset.embedding_model,
           embedding_model_provider: localeCurrentDataset.embedding_model_provider,
@@ -157,7 +147,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
       onSave({
         ...localeCurrentDataset,
         indexing_technique: indexMethod,
-        retrieval_model_dict: postRetrievalConfig,
+        retrieval_model_dict: retrievalConfig,
       })
     }
     catch (e) {

+ 3 - 3
web/app/components/app/configuration/index.tsx

@@ -287,9 +287,9 @@ const Configuration: FC = () => {
 
     setDatasetConfigs({
       ...retrievalConfig,
-      reranking_model: restConfigs.reranking_model && {
-        reranking_provider_name: restConfigs.reranking_model.reranking_provider_name,
-        reranking_model_name: restConfigs.reranking_model.reranking_model_name,
+      reranking_model: {
+        reranking_provider_name: retrievalConfig?.reranking_model?.provider || '',
+        reranking_model_name: retrievalConfig?.reranking_model?.model || '',
       },
       retrieval_model,
       score_threshold_enabled,

+ 8 - 7
web/app/components/datasets/common/check-rerank-model.ts

@@ -6,14 +6,10 @@ import type {
 import { RerankingModeEnum } from '@/models/datasets'
 
 export const isReRankModelSelected = ({
-  rerankDefaultModel,
-  isRerankDefaultModelValid,
   retrievalConfig,
   rerankModelList,
   indexMethod,
 }: {
-  rerankDefaultModel?: DefaultModelResponse
-  isRerankDefaultModelValid: boolean
   retrievalConfig: RetrievalConfig
   rerankModelList: Model[]
   indexMethod?: string
@@ -25,12 +21,17 @@ export const isReRankModelSelected = ({
       return provider?.models.find(({ model }) => model === retrievalConfig.reranking_model?.reranking_model_name)
     }
 
-    if (isRerankDefaultModelValid)
-      return !!rerankDefaultModel
-
     return false
   })()
 
+  if (
+    indexMethod === 'high_quality'
+    && ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrievalConfig.search_method))
+    && retrievalConfig.reranking_enable
+    && !rerankModelSelected
+  )
+    return false
+
   if (
     indexMethod === 'high_quality'
     && (retrievalConfig.search_method === RETRIEVE_METHOD.hybrid && retrievalConfig.reranking_mode !== RerankingModeEnum.WeightedScore)

+ 4 - 1
web/app/components/datasets/common/economical-retrieval-method-config/index.tsx

@@ -10,11 +10,13 @@ import { RETRIEVE_METHOD } from '@/types/app'
 import type { RetrievalConfig } from '@/types/app'
 
 type Props = {
+  disabled?: boolean
   value: RetrievalConfig
   onChange: (value: RetrievalConfig) => void
 }
 
 const EconomicalRetrievalMethodConfig: FC<Props> = ({
+  disabled = false,
   value,
   onChange,
 }) => {
@@ -22,7 +24,8 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
 
   return (
     <div className='space-y-2'>
-      <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
+      <OptionCard
+        disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
         title={t('dataset.retrieval.invertedIndex.title')}
         description={t('dataset.retrieval.invertedIndex.description')} isActive
         activeHeaderClassName='bg-dataset-option-card-purple-gradient'

+ 71 - 45
web/app/components/datasets/common/retrieval-method-config/index.tsx

@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import React from 'react'
+import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
@@ -10,7 +10,7 @@ import { retrievalIcon } from '../../create/icons'
 import type { RetrievalConfig } from '@/types/app'
 import { RETRIEVE_METHOD } from '@/types/app'
 import { useProviderContext } from '@/context/provider-context'
-import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import {
   DEFAULT_WEIGHTED_SCORE,
@@ -20,54 +20,87 @@ import {
 import Badge from '@/app/components/base/badge'
 
 type Props = {
+  disabled?: boolean
   value: RetrievalConfig
   onChange: (value: RetrievalConfig) => void
 }
 
 const RetrievalMethodConfig: FC<Props> = ({
-  value: passValue,
+  disabled = false,
+  value,
   onChange,
 }) => {
   const { t } = useTranslation()
   const { supportRetrievalMethods } = useProviderContext()
-  const { data: rerankDefaultModel } = useDefaultModel(ModelTypeEnum.rerank)
-  const value = (() => {
-    if (!passValue.reranking_model.reranking_model_name) {
-      return {
-        ...passValue,
-        reranking_model: {
-          reranking_provider_name: rerankDefaultModel?.provider.provider || '',
-          reranking_model_name: rerankDefaultModel?.model || '',
-        },
-        reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore),
-        weights: passValue.weights || {
-          weight_type: WeightedScoreEnum.Customized,
-          vector_setting: {
-            vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
-            embedding_provider_name: '',
-            embedding_model_name: '',
-          },
-          keyword_setting: {
-            keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
-          },
-        },
-      }
+  const {
+    defaultModel: rerankDefaultModel,
+    currentModel: isRerankDefaultModelValid,
+  } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
+
+  const onSwitch = useCallback((retrieveMethod: RETRIEVE_METHOD) => {
+    if ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrieveMethod)) {
+      onChange({
+        ...value,
+        search_method: retrieveMethod,
+        ...(!value.reranking_model.reranking_model_name
+          ? {
+            reranking_model: {
+              reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
+              reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+            },
+            reranking_enable: !!isRerankDefaultModelValid,
+          }
+          : {
+            reranking_enable: true,
+          }),
+      })
     }
-    return passValue
-  })()
+    if (retrieveMethod === RETRIEVE_METHOD.hybrid) {
+      onChange({
+        ...value,
+        search_method: retrieveMethod,
+        ...(!value.reranking_model.reranking_model_name
+          ? {
+            reranking_model: {
+              reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
+              reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+            },
+            reranking_enable: !!isRerankDefaultModelValid,
+            reranking_mode: isRerankDefaultModelValid ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore,
+          }
+          : {
+            reranking_enable: true,
+            reranking_mode: RerankingModeEnum.RerankingModel,
+          }),
+        ...(!value.weights
+          ? {
+            weights: {
+              weight_type: WeightedScoreEnum.Customized,
+              vector_setting: {
+                vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
+                embedding_provider_name: '',
+                embedding_model_name: '',
+              },
+              keyword_setting: {
+                keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
+              },
+            },
+          }
+          : {}),
+      })
+    }
+  }, [value, rerankDefaultModel, isRerankDefaultModelValid, onChange])
+
   return (
     <div className='space-y-2'>
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
           title={t('dataset.retrieval.semantic_search.title')}
           description={t('dataset.retrieval.semantic_search.description')}
           isActive={
             value.search_method === RETRIEVE_METHOD.semantic
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.semantic,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.semantic)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >
@@ -78,17 +111,14 @@ const RetrievalMethodConfig: FC<Props> = ({
           />
         </OptionCard>
       )}
-      {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
+      {supportRetrievalMethods.includes(RETRIEVE_METHOD.fullText) && (
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
           title={t('dataset.retrieval.full_text_search.title')}
           description={t('dataset.retrieval.full_text_search.description')}
           isActive={
             value.search_method === RETRIEVE_METHOD.fullText
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.fullText,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.fullText)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >
@@ -99,8 +129,8 @@ const RetrievalMethodConfig: FC<Props> = ({
           />
         </OptionCard>
       )}
-      {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
+      {supportRetrievalMethods.includes(RETRIEVE_METHOD.hybrid) && (
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
           title={
             <div className='flex items-center space-x-1'>
               <div>{t('dataset.retrieval.hybrid_search.title')}</div>
@@ -110,11 +140,7 @@ const RetrievalMethodConfig: FC<Props> = ({
           description={t('dataset.retrieval.hybrid_search.description')} isActive={
             value.search_method === RETRIEVE_METHOD.hybrid
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.hybrid,
-            reranking_enable: true,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.hybrid)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >

+ 43 - 59
web/app/components/datasets/common/retrieval-param-config/index.tsx

@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import React, { useCallback } from 'react'
+import React, { useCallback, useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 
 import Image from 'next/image'
@@ -39,8 +39,8 @@ const RetrievalParamConfig: FC<Props> = ({
   const { t } = useTranslation()
   const canToggleRerankModalEnable = type !== RETRIEVE_METHOD.hybrid
   const isEconomical = type === RETRIEVE_METHOD.invertedIndex
+  const isHybridSearch = type === RETRIEVE_METHOD.hybrid
   const {
-    defaultModel: rerankDefaultModel,
     modelList: rerankModelList,
   } = useModelListAndDefaultModel(ModelTypeEnum.rerank)
 
@@ -48,35 +48,28 @@ const RetrievalParamConfig: FC<Props> = ({
     currentModel,
   } = useCurrentProviderAndModel(
     rerankModelList,
-    rerankDefaultModel
-      ? {
-        ...rerankDefaultModel,
-        provider: rerankDefaultModel.provider.provider,
-      }
-      : undefined,
+    {
+      provider: value.reranking_model?.reranking_provider_name ?? '',
+      model: value.reranking_model?.reranking_model_name ?? '',
+    },
   )
 
-  const handleDisabledSwitchClick = useCallback(() => {
-    if (!currentModel)
+  const handleDisabledSwitchClick = useCallback((enable: boolean) => {
+    if (enable && !currentModel)
       Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
-  }, [currentModel, rerankDefaultModel, t])
-
-  const isHybridSearch = type === RETRIEVE_METHOD.hybrid
+    onChange({
+      ...value,
+      reranking_enable: enable,
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [currentModel, onChange, value])
 
-  const rerankModel = (() => {
-    if (value.reranking_model) {
-      return {
-        provider_name: value.reranking_model.reranking_provider_name,
-        model_name: value.reranking_model.reranking_model_name,
-      }
-    }
-    else if (rerankDefaultModel) {
-      return {
-        provider_name: rerankDefaultModel.provider.provider,
-        model_name: rerankDefaultModel.model,
-      }
+  const rerankModel = useMemo(() => {
+    return {
+      provider_name: value.reranking_model.reranking_provider_name,
+      model_name: value.reranking_model.reranking_model_name,
     }
-  })()
+  }, [value.reranking_model])
 
   const handleChangeRerankMode = (v: RerankingModeEnum) => {
     if (v === value.reranking_mode)
@@ -100,6 +93,8 @@ const RetrievalParamConfig: FC<Props> = ({
         },
       }
     }
+    if (v === RerankingModeEnum.RerankingModel && !currentModel)
+      Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
     onChange(result)
   }
 
@@ -122,22 +117,11 @@ const RetrievalParamConfig: FC<Props> = ({
         <div>
           <div className='flex items-center space-x-2 mb-2'>
             {canToggleRerankModalEnable && (
-              <div
-                className='flex items-center'
-                onClick={handleDisabledSwitchClick}
-              >
-                <Switch
-                  size='md'
-                  defaultValue={currentModel ? value.reranking_enable : false}
-                  onChange={(v) => {
-                    onChange({
-                      ...value,
-                      reranking_enable: v,
-                    })
-                  }}
-                  disabled={!currentModel}
-                />
-              </div>
+              <Switch
+                size='md'
+                defaultValue={value.reranking_enable}
+                onChange={handleDisabledSwitchClick}
+              />
             )}
             <div className='flex items-center'>
               <span className='mr-0.5 system-sm-semibold text-text-secondary'>{t('common.modelProvider.rerankModel.key')}</span>
@@ -148,21 +132,23 @@ const RetrievalParamConfig: FC<Props> = ({
               />
             </div>
           </div>
-          <ModelSelector
-            triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
-            defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
-            modelList={rerankModelList}
-            readonly={!value.reranking_enable}
-            onSelect={(v) => {
-              onChange({
-                ...value,
-                reranking_model: {
-                  reranking_provider_name: v.provider,
-                  reranking_model_name: v.model,
-                },
-              })
-            }}
-          />
+          {
+            value.reranking_enable && (
+              <ModelSelector
+                defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
+                modelList={rerankModelList}
+                onSelect={(v) => {
+                  onChange({
+                    ...value,
+                    reranking_model: {
+                      reranking_provider_name: v.provider,
+                      reranking_model_name: v.model,
+                    },
+                  })
+                }}
+              />
+            )
+          }
         </div>
       )}
       {
@@ -255,10 +241,8 @@ const RetrievalParamConfig: FC<Props> = ({
             {
               value.reranking_mode !== RerankingModeEnum.WeightedScore && (
                 <ModelSelector
-                  triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
                   defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
                   modelList={rerankModelList}
-                  readonly={!value.reranking_enable}
                   onSelect={(v) => {
                     onChange({
                       ...value,

+ 3 - 0
web/app/components/datasets/create/embedding-process/index.tsx

@@ -30,6 +30,7 @@ import { useProviderContext } from '@/context/provider-context'
 import { sleep } from '@/utils'
 import { RETRIEVE_METHOD } from '@/types/app'
 import Tooltip from '@/app/components/base/tooltip'
+import { useInvalidDocumentList } from '@/service/knowledge/use-document'
 
 type Props = {
   datasetId: string
@@ -207,7 +208,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
   })
 
   const router = useRouter()
+  const invalidDocumentList = useInvalidDocumentList()
   const navToDocumentList = () => {
+    invalidDocumentList()
     router.push(`/datasets/${datasetId}/documents`)
   }
   const navToApiDocs = () => {

+ 46 - 58
web/app/components/datasets/create/step-two/index.tsx

@@ -31,17 +31,17 @@ import LanguageSelect from './language-select'
 import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
 import cn from '@/utils/classnames'
 import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
+import { ChunkingMode, DataSourceType, ProcessMode } from '@/models/datasets'
 
 import Button from '@/app/components/base/button'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import { type RetrievalConfig } from '@/types/app'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import Toast from '@/app/components/base/toast'
 import type { NotionPage } from '@/models/common'
 import { DataSourceProvider } from '@/models/common'
-import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import I18n from '@/context/i18n'
 import { RETRIEVE_METHOD } from '@/types/app'
@@ -90,17 +90,13 @@ type StepTwoProps = {
   onCancel?: () => void
 }
 
-export enum SegmentType {
-  AUTO = 'automatic',
-  CUSTOM = 'custom',
-}
 export enum IndexingType {
   QUALIFIED = 'high_quality',
   ECONOMICAL = 'economy',
 }
 
 const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
-const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
+const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500
 const DEFAULT_OVERLAP = 50
 
 type ParentChildConfig = {
@@ -131,7 +127,6 @@ const StepTwo = ({
   isSetting,
   documentDetail,
   isAPIKeySet,
-  onSetting,
   datasetId,
   indexingType,
   dataSourceType: inCreatePageDataSourceType,
@@ -162,12 +157,12 @@ const StepTwo = ({
 
   const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
   const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
-  const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.CUSTOM)
+  const [segmentationType, setSegmentationType] = useState<ProcessMode>(ProcessMode.general)
   const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
   const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
     doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
   }, [])
-  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
+  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH) // default chunk length
   const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
   const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
   const [rules, setRules] = useState<PreProcessingRule[]>([])
@@ -198,7 +193,6 @@ const StepTwo = ({
   )
 
   // QA Related
-  const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false)
   const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false)
   const [docForm, setDocForm] = useState<ChunkingMode>(
     (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text,
@@ -348,7 +342,7 @@ const StepTwo = ({
   }
 
   const updatePreview = () => {
-    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) {
+    if (segmentationType === ProcessMode.general && maxChunkLength > 4000) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
       return
     }
@@ -373,13 +367,42 @@ const StepTwo = ({
         model: defaultEmbeddingModel?.model || '',
       },
   )
+  const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
+    search_method: RETRIEVE_METHOD.semantic,
+    reranking_enable: false,
+    reranking_model: {
+      reranking_provider_name: '',
+      reranking_model_name: '',
+    },
+    top_k: 3,
+    score_threshold_enabled: false,
+    score_threshold: 0.5,
+  } as RetrievalConfig)
+
+  useEffect(() => {
+    if (currentDataset?.retrieval_model_dict)
+      return
+    setRetrievalConfig({
+      search_method: RETRIEVE_METHOD.semantic,
+      reranking_enable: !!isRerankDefaultModelValid,
+      reranking_model: {
+        reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '',
+        reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+      },
+      top_k: 3,
+      score_threshold_enabled: false,
+      score_threshold: 0.5,
+    })
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [rerankDefaultModel, isRerankDefaultModelValid])
+
   const getCreationParams = () => {
     let params
-    if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
+    if (segmentationType === ProcessMode.general && overlap > maxChunkLength) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
       return
     }
-    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
+    if (segmentationType === ProcessMode.general && maxChunkLength > limitMaxChunkLength) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
       return
     }
@@ -389,7 +412,6 @@ const StepTwo = ({
         doc_form: currentDocForm,
         doc_language: docLanguage,
         process_rule: getProcessRule(),
-        // eslint-disable-next-line @typescript-eslint/no-use-before-define
         retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
         embedding_model: embeddingModel.model, // Readonly
         embedding_model_provider: embeddingModel.provider, // Readonly
@@ -400,10 +422,7 @@ const StepTwo = ({
       const indexMethod = getIndexing_technique()
       if (
         !isReRankModelSelected({
-          rerankDefaultModel,
-          isRerankDefaultModelValid: !!isRerankDefaultModelValid,
           rerankModelList,
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
           retrievalConfig,
           indexMethod: indexMethod as string,
         })
@@ -411,16 +430,6 @@ const StepTwo = ({
         Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
         return
       }
-      const postRetrievalConfig = ensureRerankModelSelected({
-        rerankDefaultModel: rerankDefaultModel!,
-        retrievalConfig: {
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
-          ...retrievalConfig,
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
-          reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-        },
-        indexMethod: indexMethod as string,
-      })
       params = {
         data_source: {
           type: dataSourceType,
@@ -432,8 +441,7 @@ const StepTwo = ({
         process_rule: getProcessRule(),
         doc_form: currentDocForm,
         doc_language: docLanguage,
-
-        retrieval_model: postRetrievalConfig,
+        retrieval_model: retrievalConfig,
         embedding_model: embeddingModel.model,
         embedding_model_provider: embeddingModel.provider,
       } as CreateDocumentReq
@@ -490,7 +498,6 @@ const StepTwo = ({
 
   const getDefaultMode = () => {
     if (documentDetail)
-      // @ts-expect-error fix after api refactored
       setSegmentationType(documentDetail.dataset_process_rule.mode)
   }
 
@@ -525,7 +532,6 @@ const StepTwo = ({
           onSuccess(data) {
             updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
             updateResultCache && updateResultCache(data)
-            // eslint-disable-next-line @typescript-eslint/no-use-before-define
             updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
           },
         },
@@ -545,14 +551,6 @@ const StepTwo = ({
     isSetting && onSave && onSave()
   }
 
-  const changeToEconomicalType = () => {
-    if (docForm !== ChunkingMode.text)
-      return
-
-    if (!hasSetIndexType)
-      setIndexType(IndexingType.ECONOMICAL)
-  }
-
   useEffect(() => {
     // fetch rules
     if (!isSetting) {
@@ -574,18 +572,6 @@ const StepTwo = ({
       setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
   }, [isAPIKeySet, indexingType, datasetId])
 
-  const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
-    search_method: RETRIEVE_METHOD.semantic,
-    reranking_enable: false,
-    reranking_model: {
-      reranking_provider_name: rerankDefaultModel?.provider.provider,
-      reranking_model_name: rerankDefaultModel?.model,
-    },
-    top_k: 3,
-    score_threshold_enabled: false,
-    score_threshold: 0.5,
-  } as RetrievalConfig)
-
   const economyDomRef = useRef<HTMLDivElement>(null)
   const isHoveringEconomy = useHover(economyDomRef)
 
@@ -984,12 +970,14 @@ const StepTwo = ({
               getIndexing_technique() === IndexingType.QUALIFIED
                 ? (
                   <RetrievalMethodConfig
+                    disabled={!!datasetId}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />
                 )
                 : (
                   <EconomicalRetrievalMethodConfig
+                    disabled={!!datasetId}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />
@@ -1010,7 +998,7 @@ const StepTwo = ({
           )
           : (
             <div className='flex items-center mt-8 py-2'>
-              <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
+              {!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>}
               <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
             </div>
           )}
@@ -1081,11 +1069,11 @@ const StepTwo = ({
               }
               {
                 currentDocForm !== ChunkingMode.qa
-                  && <Badge text={t(
-                    'datasetCreation.stepTwo.previewChunkCount', {
-                      count: estimate?.total_segments || 0,
-                    }) as string}
-                  />
+                && <Badge text={t(
+                  'datasetCreation.stepTwo.previewChunkCount', {
+                    count: estimate?.total_segments || 0,
+                  }) as string}
+                />
               }
             </div>
           </PreviewHeader>}

+ 2 - 2
web/app/components/datasets/create/step-two/option-card.tsx

@@ -4,7 +4,7 @@ import classNames from '@/utils/classnames'
 
 const TriangleArrow: FC<ComponentProps<'svg'>> = props => (
   <svg xmlns="http://www.w3.org/2000/svg" width="24" height="11" viewBox="0 0 24 11" fill="none" {...props}>
-    <path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor"/>
+    <path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor" />
   </svg>
 )
 
@@ -65,7 +65,7 @@ export const OptionCard: FC<OptionCardProps> = forwardRef((props, ref) => {
       (isActive && !noHighlight)
         ? 'border-[1.5px] border-components-option-card-option-selected-border'
         : 'border border-components-option-card-option-border',
-      disabled && 'opacity-50 cursor-not-allowed',
+      disabled && 'opacity-50 pointer-events-none',
       className,
     )}
     style={{

+ 16 - 8
web/app/components/datasets/documents/detail/completed/index.tsx

@@ -232,6 +232,16 @@ const Completed: FC<ICompletedProps> = ({
     setFullScreen(false)
   }, [])
 
+  const onCloseNewSegmentModal = useCallback(() => {
+    onNewSegmentModalChange(false)
+    setFullScreen(false)
+  }, [onNewSegmentModalChange])
+
+  const onCloseNewChildChunkModal = useCallback(() => {
+    setShowNewChildSegmentModal(false)
+    setFullScreen(false)
+  }, [])
+
   const { mutateAsync: enableSegment } = useEnableSegment()
   const { mutateAsync: disableSegment } = useDisableSegment()
 
@@ -623,6 +633,7 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={currSegment.showModal}
         fullScreen={fullScreen}
+        onClose={onCloseSegmentDetail}
       >
         <SegmentDetail
           segInfo={currSegment.segInfo ?? { id: '' }}
@@ -636,13 +647,11 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={showNewSegmentModal}
         fullScreen={fullScreen}
+        onClose={onCloseNewSegmentModal}
       >
         <NewSegment
           docForm={docForm}
-          onCancel={() => {
-            onNewSegmentModalChange(false)
-            setFullScreen(false)
-          }}
+          onCancel={onCloseNewSegmentModal}
           onSave={resetList}
           viewNewlyAddedChunk={viewNewlyAddedChunk}
         />
@@ -651,6 +660,7 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={currChildChunk.showModal}
         fullScreen={fullScreen}
+        onClose={onCloseChildSegmentDetail}
       >
         <ChildSegmentDetail
           chunkId={currChunkId}
@@ -664,13 +674,11 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={showNewChildSegmentModal}
         fullScreen={fullScreen}
+        onClose={onCloseNewChildChunkModal}
       >
         <NewChildSegment
           chunkId={currChunkId}
-          onCancel={() => {
-            setShowNewChildSegmentModal(false)
-            setFullScreen(false)
-          }}
+          onCancel={onCloseNewChildChunkModal}
           onSave={onSaveNewChildChunk}
           viewNewlyAddedChildChunk={viewNewlyAddedChildChunk}
         />

+ 1 - 1
web/app/components/datasets/documents/detail/completed/segment-list.tsx

@@ -80,7 +80,7 @@ ref: ForwardedRef<HTMLDivElement>,
                 checked={selectedSegmentIds.includes(segItem.id)}
                 onCheck={() => onSelected(segItem.id)}
               />
-              <div className='grow'>
+              <div className='grow min-w-0'>
                 <SegmentCard
                   key={`${segItem.id}-card`}
                   detail={segItem}

+ 14 - 4
web/app/components/datasets/documents/detail/index.tsx

@@ -22,8 +22,9 @@ import { useDatasetDetailContext } from '@/context/dataset-detail'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
-import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment'
+import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
 import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
+import { useInvalid } from '@/service/use-base'
 
 type DocumentContextValue = {
   datasetId?: string
@@ -149,11 +150,20 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
 
   const embedding = ['queuing', 'indexing', 'paused'].includes((documentDetail?.display_status || '').toLowerCase())
 
+  const invalidChunkList = useInvalid(useSegmentListKey)
+  const invalidChildChunkList = useInvalid(useChildSegmentListKey)
+
   const handleOperate = (operateName?: string) => {
-    if (operateName === 'delete')
+    if (operateName === 'delete') {
       backToPrev()
-    else
+    }
+    else {
       detailMutate()
+      setTimeout(() => {
+        invalidChunkList()
+        invalidChildChunkList()
+      }, 5000)
+    }
   }
 
   const mode = useMemo(() => {
@@ -245,7 +255,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
         <div className='flex flex-row flex-1' style={{ height: 'calc(100% - 4rem)' }}>
           {isDetailLoading
             ? <Loading type='app' />
-            : <div className={cn('h-full w-full flex flex-col',
+            : <div className={cn('h-full grow min-w-0 flex flex-col',
               embedding ? '' : isFullDocMode ? 'relative pt-4 pr-11 pl-11' : 'relative pt-3 pr-11 pl-5',
             )}>
               {embedding

+ 16 - 2
web/app/components/datasets/documents/index.tsx

@@ -24,6 +24,10 @@ import { DataSourceType } from '@/models/datasets'
 import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
 import { useProviderContext } from '@/context/provider-context'
 import cn from '@/utils/classnames'
+import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
+import { useInvalid } from '@/service/use-base'
+import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
+
 const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
   return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
     <path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
@@ -99,7 +103,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
     return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
   }, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
 
-  const { data: documentsRes, error, mutate, isLoading: isListLoading } = useSWR(
+  const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR(
     {
       action: 'fetchDocuments',
       datasetId,
@@ -115,10 +119,20 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
       setIsMuting(false)
   }, [isListLoading, isMuting])
 
+  const invalidDocumentDetail = useInvalidDocumentDetailKey()
+  const invalidChunkList = useInvalid(useSegmentListKey)
+  const invalidChildChunkList = useInvalid(useChildSegmentListKey)
+
   const handleUpdate = useCallback(() => {
     setIsMuting(true)
     mutate()
-  }, [mutate])
+    invalidDocumentDetail()
+    setTimeout(() => {
+      invalidChunkList()
+      invalidChildChunkList()
+    }, 5000)
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
 
   const documentsWithProgress = useMemo(() => {
     let completedNum = 0

+ 17 - 21
web/app/components/datasets/documents/list.tsx

@@ -133,6 +133,16 @@ export const StatusItem: FC<{
     <span className={cn(`${STATUS_TEXT_COLOR_MAP[DOC_INDEX_STATUS_MAP[localStatus].color as keyof typeof STATUS_TEXT_COLOR_MAP]} text-sm`, textCls)}>
       {DOC_INDEX_STATUS_MAP[localStatus]?.text}
     </span>
+    {
+      errorMessage && (
+        <Tooltip
+          popupContent={
+            <div className='max-w-[260px] break-all'>{errorMessage}</div>
+          }
+          triggerClassName='ml-1 w-4 h-4'
+        />
+      )
+    }
     {
       scene === 'detail' && (
         <div className='flex justify-between items-center ml-1.5'>
@@ -152,16 +162,6 @@ export const StatusItem: FC<{
         </div>
       )
     }
-    {
-      errorMessage && (
-        <Tooltip
-          popupContent={
-            <div className='max-w-[260px] break-all'>{errorMessage}</div>
-          }
-          triggerClassName='ml-1 w-4 h-4'
-        />
-      )
-    }
   </div>
 }
 
@@ -561,18 +561,14 @@ const DocumentList: FC<IDocumentListProps> = ({
                 </div>
               </td>
               <td>
-                <div className={'group flex items-center justify-between mr-6 hover:mr-0'}>
-                  <span className={cn(s.tdValue, 'flex items-center')}>
-                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
-                    }
+                <div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
+                  <div className='shrink-0'>
+                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
                     {doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
-                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />
-                    }
-                    {
-                      doc.name
-                    }
-                  </span>
-                  <div className='group-hover:flex hidden'>
+                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
+                  </div>
+                  <span className='text-sm truncate grow-1'>{doc.name}</span>
+                  <div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
                     <Tooltip
                       popupContent={t('datasetDocuments.list.table.rename')}
                     >

+ 1 - 1
web/app/components/datasets/hit-testing/components/child-chunks-item.tsx

@@ -17,7 +17,7 @@ const ChildChunks: FC<Props> = ({
   const { id, score, content, position } = payload
   return (
     <div
-      className={!isShowAll ? 'line-clamp-2' : ''}
+      className={!isShowAll ? 'line-clamp-2 break-all' : ''}
     >
       <div className='inline-flex items-center relative top-[-2px]'>
         <div className='flex items-center h-[20.5px] bg-state-accent-solid  system-2xs-semibold-uppercase text-text-primary-on-surface px-1'>C-{position}</div>

+ 1 - 1
web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx

@@ -56,7 +56,7 @@ const ChunkDetailModal: FC<Props> = ({
             </div>
             <Score value={score} />
           </div>
-          <div className={cn('mt-2 body-md-regular text-text-secondary', heighClassName)}>
+          <div className={cn('mt-2 body-md-regular text-text-secondary break-all', heighClassName)}>
             {content}
           </div>
           {!isParentChildRetrieval && keywords && keywords.length > 0 && (

+ 2 - 7
web/app/components/datasets/hit-testing/components/result-item.tsx

@@ -43,13 +43,8 @@ const ResultItem: FC<Props> = ({
     setFalse: hideDetailModal,
   }] = useBoolean(false)
 
-  const handleClickCard = () => {
-    if (!isParentChildRetrieval)
-      showDetailModal()
-  }
-
   return (
-    <div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg', !isParentChildRetrieval && 'cursor-pointer')} onClick={handleClickCard}>
+    <div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg cursor-pointer')} onClick={showDetailModal}>
       {/* Meta info */}
       <div className='flex justify-between items-center px-3'>
         <div className='flex items-center space-x-2'>
@@ -66,7 +61,7 @@ const ResultItem: FC<Props> = ({
 
       {/* Main */}
       <div className='mt-1 px-3'>
-        <div className='line-clamp-2 body-md-regular'>{content}</div>
+        <div className='line-clamp-2 body-md-regular break-all'>{content}</div>
         {isParentChildRetrieval && (
           <div className='mt-1'>
             <div className={cn('inline-flex items-center h-6 space-x-0.5 text-text-secondary select-none rounded-lg cursor-pointer', isFold && 'pl-1 bg-[linear-gradient(90deg,_rgba(200,_206,_218,_0.20)_0%,_rgba(200,_206,_218,_0.04)_100%)]')} onClick={toggleFold}>

+ 4 - 4
web/app/components/datasets/hit-testing/components/score.tsx

@@ -12,15 +12,15 @@ const Score: FC<Props> = ({
   value,
   besideChunkName,
 }) => {
-  if (!value)
+  if (!value || isNaN(value))
     return null
-
   return (
-    <div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden', besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
+    <div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden',
+      besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
       <div className={cn('absolute top-0 left-0 h-full bg-util-colors-blue-brand-blue-brand-100 border-r-[1.5px] border-components-progress-brand-progress', value === 1 && 'border-r-0')} style={{ width: `${value * 100}%` }} />
       <div className={cn('relative flex items-center h-full space-x-0.5 text-util-colors-blue-brand-blue-brand-700')}>
         <div className='system-2xs-medium-uppercase'>score</div>
-        <div className='system-xs-semibold'>{value.toFixed(2)}</div>
+        <div className='system-xs-semibold'>{value?.toFixed(2)}</div>
       </div>
     </div>
   )

+ 1 - 1
web/app/components/datasets/hit-testing/index.tsx

@@ -192,7 +192,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
           }
         </div>
       </FloatRightContainer>
-      <Drawer isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
+      <Drawer unmount={true} isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
         <ModifyRetrievalModal
           indexMethod={currentDataset?.indexing_technique || ''}
           value={retrievalConfig}

+ 2 - 14
web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx

@@ -9,9 +9,8 @@ import type { RetrievalConfig } from '@/types/app'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import Button from '@/app/components/base/button'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
-import { RerankingModeEnum } from '@/models/datasets'
 
 type Props = {
   indexMethod: string
@@ -39,15 +38,11 @@ const ModifyRetrievalModal: FC<Props> = ({
 
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const handleSave = () => {
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -56,14 +51,7 @@ const ModifyRetrievalModal: FC<Props> = ({
       Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    onSave(ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    }))
+    onSave(retrievalConfig)
   }
 
   if (!isShow)

+ 7 - 19
web/app/components/datasets/settings/form/index.tsx

@@ -17,11 +17,11 @@ import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
 import { updateDatasetSetting } from '@/service/datasets'
-import { type DataSetListResponse, RerankingModeEnum } from '@/models/datasets'
+import { type DataSetListResponse } from '@/models/datasets'
 import DatasetDetailContext from '@/context/dataset-detail'
 import { type RetrievalConfig } from '@/types/app'
 import { useAppContext } from '@/context/app-context'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
 import {
   useModelList,
@@ -74,8 +74,6 @@ const Form = () => {
   )
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
   const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
 
@@ -109,8 +107,6 @@ const Form = () => {
     }
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -119,17 +115,9 @@ const Form = () => {
       notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    const postRetrievalConfig = ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    })
-    if (postRetrievalConfig.weights) {
-      postRetrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
-      postRetrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
+    if (retrievalConfig.weights) {
+      retrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
+      retrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
     }
     try {
       setLoading(true)
@@ -141,8 +129,8 @@ const Form = () => {
           permission,
           indexing_technique: indexMethod,
           retrieval_model: {
-            ...postRetrievalConfig,
-            score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
+            ...retrievalConfig,
+            score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0,
           },
           embedding_model: embeddingModel.model,
           embedding_model_provider: embeddingModel.provider,

+ 1 - 0
web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx

@@ -36,6 +36,7 @@ const ModelTrigger: FC<ModelTriggerProps> = ({
       className={classNames(
         'group flex items-center px-2 h-8 rounded-lg bg-components-input-bg-normal',
         !readonly && 'hover:bg-components-input-bg-hover cursor-pointer',
+        !!readonly && 'opacity-50',
         className,
         open && '!bg-components-input-bg-hover',
         model.status !== ModelStatusEnum.active && '!bg-[#FFFAEB]',

+ 5 - 4
web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx

@@ -59,7 +59,8 @@ const RetrievalConfig: FC<Props> = ({
   }, [onOpenFromPropsChange])
 
   const {
-    defaultModel: rerankDefaultModel,
+    currentProvider: validRerankDefaultProvider,
+    currentModel: validRerankDefaultModel,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const { multiple_retrieval_config } = payload
@@ -75,8 +76,8 @@ const RetrievalConfig: FC<Props> = ({
         ? undefined
         : (!configs.reranking_model?.reranking_provider_name
           ? {
-            provider: rerankDefaultModel?.provider?.provider || '',
-            model: rerankDefaultModel?.model || '',
+            provider: validRerankDefaultProvider?.provider || '',
+            model: validRerankDefaultModel?.model || '',
           }
           : {
             provider: configs.reranking_model?.reranking_provider_name,
@@ -86,7 +87,7 @@ const RetrievalConfig: FC<Props> = ({
       weights: configs.weights as any,
       reranking_enable: configs.reranking_enable,
     })
-  }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, rerankDefaultModel?.provider?.provider, rerankDefaultModel?.model, onRetrievalModeChange])
+  }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, validRerankDefaultProvider, validRerankDefaultModel, onRetrievalModeChange])
 
   return (
     <PortalToFollowElem

+ 1 - 1
web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts

@@ -156,7 +156,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
     })
     setInputs(newInput)
   // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [currentProvider?.provider, currentModel, rerankDefaultModel])
+  }, [currentProvider?.provider, currentModel, currentRerankModel, rerankDefaultModel])
   const [selectedDatasets, setSelectedDatasets] = useState<DataSet[]>([])
   const [rerankModelOpen, setRerankModelOpen] = useState(false)
   const handleRetrievalModeChange = useCallback((newMode: RETRIEVE_TYPE) => {

+ 23 - 10
web/app/components/workflow/nodes/knowledge-retrieval/utils.ts

@@ -126,7 +126,7 @@ export const getMultipleRetrievalConfig = (
     reranking_mode,
     reranking_model,
     weights,
-    reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true,
+    reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue,
   }
 
   const setDefaultWeights = () => {
@@ -152,16 +152,20 @@ export const getMultipleRetrievalConfig = (
 
   if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) {
     result.reranking_mode = RerankingModeEnum.RerankingModel
-
-    if (rerankModelIsValid) {
-      result.reranking_mode = RerankingModeEnum.RerankingModel
-      result.reranking_model = {
-        provider: validRerankModel?.provider || '',
-        model: validRerankModel?.model || '',
+    if (!result.reranking_model?.provider || !result.reranking_model?.model) {
+      if (rerankModelIsValid) {
+        result.reranking_enable = true
+        result.reranking_model = {
+          provider: validRerankModel?.provider || '',
+          model: validRerankModel?.model || '',
+        }
+      }
+      else {
+        result.reranking_model = {
+          provider: '',
+          model: '',
+        }
       }
-    }
-    else {
-      result.reranking_model = undefined
     }
   }
 
@@ -169,6 +173,7 @@ export const getMultipleRetrievalConfig = (
     if (!reranking_mode) {
       if (validRerankModel?.provider && validRerankModel?.model) {
         result.reranking_mode = RerankingModeEnum.RerankingModel
+        result.reranking_enable = true
         result.reranking_model = {
           provider: validRerankModel.provider,
           model: validRerankModel.model,
@@ -186,6 +191,7 @@ export const getMultipleRetrievalConfig = (
     if (reranking_mode === RerankingModeEnum.WeightedScore && weights && shouldSetWeightDefaultValue) {
       if (rerankModelIsValid) {
         result.reranking_mode = RerankingModeEnum.RerankingModel
+        result.reranking_enable = true
         result.reranking_model = {
           provider: validRerankModel.provider || '',
           model: validRerankModel.model || '',
@@ -199,6 +205,13 @@ export const getMultipleRetrievalConfig = (
       result.reranking_mode = RerankingModeEnum.WeightedScore
       setDefaultWeights()
     }
+    if (reranking_mode === RerankingModeEnum.RerankingModel && rerankModelIsValid) {
+      result.reranking_enable = true
+      result.reranking_model = {
+        provider: validRerankModel.provider || '',
+        model: validRerankModel.model || '',
+      }
+    }
   }
 
   return result

+ 1 - 1
web/i18n/en-US/app-debug.ts

@@ -483,7 +483,7 @@ const translation = {
       title: 'Multi-path retrieval',
       description: 'Based on user intent, queries across all Knowledge, retrieves relevant text from multi-sources, and selects the best results matching the user query after reranking. ',
     },
-    rerankModelRequired: 'Rerank model is required',
+    rerankModelRequired: 'A configured Rerank Model is required',
     params: 'Params',
     top_k: 'Top K',
     top_kTip: 'Used to filter chunks that are most similar to user questions. The system will also dynamically adjust the value of Top K, according to max_tokens of the selected model.',

+ 2 - 2
web/i18n/en-US/workflow.ts

@@ -183,7 +183,7 @@ const translation = {
   },
   errorMsg: {
     fieldRequired: '{{field}} is required',
-    rerankModelRequired: 'Before turning on the Rerank Model, please confirm that the model has been successfully configured in the settings.',
+    rerankModelRequired: 'A configured Rerank Model is required',
     authRequired: 'Authorization is required',
     invalidJson: '{{field}} is invalid JSON',
     fields: {
@@ -191,7 +191,7 @@ const translation = {
       variableValue: 'Variable Value',
       code: 'Code',
       model: 'Model',
-      rerankModel: 'Rerank Model',
+      rerankModel: 'A configured Rerank Model',
       visionVariable: 'Vision Variable',
     },
     invalidVariable: 'Invalid variable',

+ 1 - 1
web/i18n/zh-Hans/app-debug.ts

@@ -475,7 +475,7 @@ const translation = {
       title: '多路召回',
       description: '根据用户意图同时匹配所有知识库,从多路知识库查询相关文本片段,经过重排序步骤,从多路查询结果中选择匹配用户问题的最佳结果。',
     },
-    rerankModelRequired: '请选择 Rerank 模型',
+    rerankModelRequired: '未配置 Rerank 模型',
     params: '参数设置',
     top_k: 'Top K',
     top_kTip: '用于筛选与用户问题相似度最高的文本片段。系统同时会根据选用模型上下文窗口大小动态调整分段数量。',

+ 1 - 1
web/i18n/zh-Hans/workflow.ts

@@ -183,7 +183,7 @@ const translation = {
   },
   errorMsg: {
     fieldRequired: '{{field}} 不能为空',
-    rerankModelRequired: '开启 Rerank 模型前,请务必确认模型已在设置中成功配置。',
+    rerankModelRequired: '未配置 Rerank 模型',
     authRequired: '请先授权',
     invalidJson: '{{field}} 是非法的 JSON',
     fields: {

+ 6 - 2
web/service/knowledge/use-document.ts

@@ -29,6 +29,10 @@ export const useDocumentList = (payload: {
   })
 }
 
+export const useInvalidDocumentList = () => {
+  return useInvalid(useDocumentListKey)
+}
+
 const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']
 export const useAutoDisabledDocuments = (datasetId: string) => {
   return useQuery({
@@ -94,7 +98,7 @@ export const useSyncWebsite = () => {
   })
 }
 
-const useDocumentDetailKey = [NAME_SPACE, 'documentDetail']
+const useDocumentDetailKey = [NAME_SPACE, 'documentDetail', 'withoutMetaData']
 export const useDocumentDetail = (payload: {
   datasetId: string
   documentId: string
@@ -114,7 +118,7 @@ export const useDocumentMetadata = (payload: {
 }) => {
   const { datasetId, documentId, params } = payload
   return useQuery<DocumentDetailResponse>({
-    queryKey: [...useDocumentDetailKey, 'withMetaData', datasetId, documentId],
+    queryKey: [...useDocumentDetailKey, 'onlyMetaData', datasetId, documentId],
     queryFn: () => get<DocumentDetailResponse>(`/datasets/${datasetId}/documents/${documentId}`, { params }),
   })
 }