Переглянути джерело

fix: Add datasets list access control and fix datasets config display issue (#12533)

Co-authored-by: nite-knite <nkCoding@gmail.com>
Wu Tianwei 3 місяців тому
батько
коміт
2e97ba5700

+ 15 - 6
web/app/(commonLayout)/datasets/Container.tsx

@@ -4,7 +4,8 @@
 import { useEffect, useMemo, useRef, useState } from 'react'
 import { useRouter } from 'next/navigation'
 import { useTranslation } from 'react-i18next'
-import { useDebounceFn } from 'ahooks'
+import { useBoolean, useDebounceFn } from 'ahooks'
+import { useQuery } from '@tanstack/react-query'
 
 // Components
 import ExternalAPIPanel from '../../components/datasets/external-api/external-api-panel'
@@ -16,7 +17,9 @@ import TabSliderNew from '@/app/components/base/tab-slider-new'
 import TagManagementModal from '@/app/components/base/tag-management'
 import TagFilter from '@/app/components/base/tag-management/filter'
 import Button from '@/app/components/base/button'
+import Input from '@/app/components/base/input'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
+import CheckboxWithLabel from '@/app/components/datasets/create/website/base/checkbox-with-label'
 
 // Services
 import { fetchDatasetApiBaseUrl } from '@/service/datasets'
@@ -26,16 +29,14 @@ import { useTabSearchParams } from '@/hooks/use-tab-searchparams'
 import { useStore as useTagStore } from '@/app/components/base/tag-management/store'
 import { useAppContext } from '@/context/app-context'
 import { useExternalApiPanel } from '@/context/external-api-panel-context'
-// eslint-disable-next-line import/order
-import { useQuery } from '@tanstack/react-query'
-import Input from '@/app/components/base/input'
 
 const Container = () => {
   const { t } = useTranslation()
   const router = useRouter()
-  const { currentWorkspace } = useAppContext()
+  const { currentWorkspace, isCurrentWorkspaceOwner } = useAppContext()
   const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
   const { showExternalApiPanel, setShowExternalApiPanel } = useExternalApiPanel()
+  const [includeAll, { toggle: toggleIncludeAll }] = useBoolean(false)
 
   const options = useMemo(() => {
     return [
@@ -90,6 +91,14 @@ const Container = () => {
         />
         {activeTab === 'dataset' && (
           <div className='flex items-center justify-center gap-2'>
+            {isCurrentWorkspaceOwner && <CheckboxWithLabel
+              isChecked={includeAll}
+              onChange={toggleIncludeAll}
+              label={t('dataset.allKnowledge')}
+              labelClassName='system-md-regular text-text-secondary'
+              className='mr-2'
+              tooltip={t('dataset.allKnowledgeDescription') as string}
+            />}
             <TagFilter type='knowledge' value={tagFilterValue} onChange={handleTagsChange} />
             <Input
               showLeftIcon
@@ -113,7 +122,7 @@ const Container = () => {
       </div>
       {activeTab === 'dataset' && (
         <>
-          <Datasets containerRef={containerRef} tags={tagIDs} keywords={searchKeywords} />
+          <Datasets containerRef={containerRef} tags={tagIDs} keywords={searchKeywords} includeAll={includeAll} />
           <DatasetFooter />
           {showTagManagementModal && (
             <TagManagementModal type='knowledge' show={showTagManagementModal} />

+ 7 - 3
web/app/(commonLayout)/datasets/Datasets.tsx

@@ -6,7 +6,7 @@ import { debounce } from 'lodash-es'
 import { useTranslation } from 'react-i18next'
 import NewDatasetCard from './NewDatasetCard'
 import DatasetCard from './DatasetCard'
-import type { DataSetListResponse } from '@/models/datasets'
+import type { DataSetListResponse, FetchDatasetsParams } from '@/models/datasets'
 import { fetchDatasets } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'
 
@@ -15,13 +15,15 @@ const getKey = (
   previousPageData: DataSetListResponse,
   tags: string[],
   keyword: string,
+  includeAll: boolean,
 ) => {
   if (!pageIndex || previousPageData.has_more) {
-    const params: any = {
+    const params: FetchDatasetsParams = {
       url: 'datasets',
       params: {
         page: pageIndex + 1,
         limit: 30,
+        include_all: includeAll,
       },
     }
     if (tags.length)
@@ -37,16 +39,18 @@ type Props = {
   containerRef: React.RefObject<HTMLDivElement>
   tags: string[]
   keywords: string
+  includeAll: boolean
 }
 
 const Datasets = ({
   containerRef,
   tags,
   keywords,
+  includeAll,
 }: Props) => {
   const { isCurrentWorkspaceEditor } = useAppContext()
   const { data, isLoading, setSize, mutate } = useSWRInfinite(
-    (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords),
+    (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords, includeAll),
     fetchDatasets,
     { revalidateFirstPage: false, revalidateAll: true },
   )

+ 45 - 3
web/app/(commonLayout)/datasets/template/template.en.mdx

@@ -1,5 +1,5 @@
 import { CodeGroup } from '@/app/components/develop/code.tsx'
-import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
+import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'
 
 # Knowledge API
 
@@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
               - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
       </Property>
+      <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        Retrieval model
+          - <code>search_method</code> (string) Search method
+            - <code>hybrid_search</code> Hybrid search
+            - <code>semantic_search</code> Semantic search
+            - <code>full_text_search</code> Full-text search
+          - <code>reranking_enable</code> (bool) Whether to enable reranking
+          - <code>reranking_mode</code> (object) Rerank model configuration
+            - <code>reranking_provider_name</code> (string) Rerank model provider
+            - <code>reranking_model_name</code> (string) Rerank model name
+          - <code>top_k</code> (int) Number of results to return
+          - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
+          - <code>score_threshold</code> (float) Score threshold
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding model name
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding model provider
+      </Property>
     </Properties>
   </Col>
   <Col sticky>
@@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
       <Property name='file' type='multipart/form-data' key='file'>
         Files that need to be uploaded.
       </Property>
+      <PropertyInstruction>When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used.</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        Retrieval model
+          - <code>search_method</code> (string) Search method
+            - <code>hybrid_search</code> Hybrid search
+            - <code>semantic_search</code> Semantic search
+            - <code>full_text_search</code> Full-text search
+          - <code>reranking_enable</code> (bool) Whether to enable reranking
+          - <code>reranking_mode</code> (object) Rerank model configuration
+            - <code>reranking_provider_name</code> (string) Rerank model provider
+            - <code>reranking_model_name</code> (string) Rerank model name
+          - <code>top_k</code> (int) Number of results to return
+          - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
+          - <code>score_threshold</code> (float) Score threshold
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding model name
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding model provider
+      </Property>
     </Properties>
   </Col>
   <Col sticky>
@@ -1188,10 +1230,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
         - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
             - <code>reranking_provider_name</code> (string) Rerank model provider
             - <code>reranking_model_name</code> (string) Rerank model name
-        - <code>weights</code> (double) Semantic search weight setting in hybrid search mode
+        - <code>weights</code> (float) Semantic search weight setting in hybrid search mode
         - <code>top_k</code> (integer) Number of results to return (optional)
         - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
-        - <code>score_threshold</code> (double) Score threshold
+        - <code>score_threshold</code> (float) Score threshold
       </Property>
       <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
           Unused field

+ 46 - 4
web/app/(commonLayout)/datasets/template/template.zh.mdx

@@ -1,5 +1,5 @@
 import { CodeGroup } from '@/app/components/develop/code.tsx'
-import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
+import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx'
 
 # 知识库 API
 
@@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
               - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
       </Property>
+      <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        检索模式
+          - <code>search_method</code> (string) 检索方法
+            - <code>hybrid_search</code> 混合检索
+            - <code>semantic_search</code> 语义检索
+            - <code>full_text_search</code> 全文检索
+          - <code>reranking_enable</code> (bool) 是否开启rerank
+          - <code>reranking_model</code> (object) Rerank 模型配置
+            - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
+            - <code>reranking_model_name</code> (string) Rerank 模型的名称
+          - <code>top_k</code> (int) 召回条数
+          - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
+          - <code>score_threshold</code> (float) 召回分数限制
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding 模型名称
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding 模型供应商
+      </Property>
     </Properties>
   </Col>
   <Col sticky>
@@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
       <Property name='file' type='multipart/form-data' key='file'>
         需要上传的文件。
       </Property>
+      <PropertyInstruction>当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项:</PropertyInstruction>
+      <Property name='retrieval_model' type='object' key='retrieval_model'>
+        检索模式
+          - <code>search_method</code> (string) 检索方法
+            - <code>hybrid_search</code> 混合检索
+            - <code>semantic_search</code> 语义检索
+            - <code>full_text_search</code> 全文检索
+          - <code>reranking_enable</code> (bool) 是否开启rerank
+          - <code>reranking_model</code> (object) Rerank 模型配置
+            - <code>reranking_provider_name</code> (string) Rerank 模型的提供商
+            - <code>reranking_model_name</code> (string) Rerank 模型的名称
+          - <code>top_k</code> (int) 召回条数
+          - <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
+          - <code>score_threshold</code> (float) 召回分数限制
+      </Property>
+      <Property name='embedding_model' type='string' key='embedding_model'>
+        Embedding 模型名称
+      </Property>
+      <Property name='embedding_model_provider' type='string' key='embedding_model_provider'>
+        Embedding 模型供应商
+      </Property>
     </Properties>
   </Col>
   <Col sticky>
@@ -1186,13 +1228,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>full_text_search</code> 全文检索
           - <code>hybrid_search</code> 混合检索
         - <code>reranking_enable</code> (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
-        - <code>reranking_mode</code> (object) Rerank模型配置,非必填,如果启用了 reranking 则传值
+        - <code>reranking_mode</code> (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值
             - <code>reranking_provider_name</code> (string) Rerank 模型提供商
             - <code>reranking_model_name</code> (string) Rerank 模型名称
-        - <code>weights</code> (double) 混合检索模式下语意检索的权重设置
+        - <code>weights</code> (float) 混合检索模式下语意检索的权重设置
         - <code>top_k</code> (integer) 返回结果数量,非必填
         - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
-        - <code>score_threshold</code> (double) Score 阈值
+        - <code>score_threshold</code> (float) Score 阈值
       </Property>
       <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
           未启用字段

+ 8 - 6
web/app/components/datasets/create/step-two/index.tsx

@@ -575,6 +575,8 @@ const StepTwo = ({
   const economyDomRef = useRef<HTMLDivElement>(null)
   const isHoveringEconomy = useHover(economyDomRef)
 
+  const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type
+
   return (
     <div className='flex w-full h-full'>
       <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}>
@@ -931,15 +933,15 @@ const StepTwo = ({
           <div className='mt-5'>
             <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
             <ModelSelector
-              readonly={!!datasetId}
-              triggerClassName={datasetId ? 'opacity-50' : ''}
+              readonly={isModelAndRetrievalConfigDisabled}
+              triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''}
               defaultModel={embeddingModel}
               modelList={embeddingModelList}
               onSelect={(model: DefaultModel) => {
                 setEmbeddingModel(model)
               }}
             />
-            {!!datasetId && (
+            {isModelAndRetrievalConfigDisabled && (
               <div className='mt-2 system-xs-medium'>
                 {t('datasetCreation.stepTwo.indexSettingTip')}
                 <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
@@ -950,7 +952,7 @@ const StepTwo = ({
         <Divider className='my-5' />
         {/* Retrieval Method Config */}
         <div>
-          {!datasetId
+          {!isModelAndRetrievalConfigDisabled
             ? (
               <div className={'mb-1'}>
                 <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div>
@@ -971,14 +973,14 @@ const StepTwo = ({
               getIndexing_technique() === IndexingType.QUALIFIED
                 ? (
                   <RetrievalMethodConfig
-                    disabled={!!datasetId}
+                    disabled={isModelAndRetrievalConfigDisabled}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />
                 )
                 : (
                   <EconomicalRetrievalMethodConfig
-                    disabled={!!datasetId}
+                    disabled={isModelAndRetrievalConfigDisabled}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />

+ 29 - 27
web/app/components/datasets/settings/form/index.tsx

@@ -223,7 +223,7 @@ const Form = () => {
               <IndexMethodRadio
                 disable={!currentDataset?.embedding_available}
                 value={indexMethod}
-                onChange={v => setIndexMethod(v)}
+                onChange={v => setIndexMethod(v!)}
                 docForm={currentDataset.doc_form}
                 currentValue={currentDataset.indexing_technique}
               />
@@ -300,35 +300,37 @@ const Form = () => {
             </div>
           </div>
         </>
-        : <>
-          <div className='w-full h-0 border-b border-divider-subtle my-1' />
-          <div className={rowClass}>
-            <div className={labelClass}>
-              <div>
-                <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
-                <div className='body-xs-regular text-text-tertiary'>
-                  <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
-                  {t('datasetSettings.form.retrievalSetting.description')}
+        : indexMethod
+          ? <>
+            <div className='w-full h-0 border-b border-divider-subtle my-1' />
+            <div className={rowClass}>
+              <div className={labelClass}>
+                <div>
+                  <div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
+                  <div className='body-xs-regular text-text-tertiary'>
+                    <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
+                    {t('datasetSettings.form.retrievalSetting.description')}
+                  </div>
                 </div>
               </div>
+              <div className='grow'>
+                {indexMethod === IndexingType.QUALIFIED
+                  ? (
+                    <RetrievalMethodConfig
+                      value={retrievalConfig}
+                      onChange={setRetrievalConfig}
+                    />
+                  )
+                  : (
+                    <EconomicalRetrievalMethodConfig
+                      value={retrievalConfig}
+                      onChange={setRetrievalConfig}
+                    />
+                  )}
+              </div>
             </div>
-            <div className='grow'>
-              {indexMethod === 'high_quality'
-                ? (
-                  <RetrievalMethodConfig
-                    value={retrievalConfig}
-                    onChange={setRetrievalConfig}
-                  />
-                )
-                : (
-                  <EconomicalRetrievalMethodConfig
-                    value={retrievalConfig}
-                    onChange={setRetrievalConfig}
-                  />
-                )}
-            </div>
-          </div>
-        </>
+          </>
+          : null
       }
       <div className='w-full h-0 border-b border-divider-subtle my-1' />
       <div className={rowClass}>

+ 7 - 0
web/app/components/develop/md.tsx

@@ -1,4 +1,5 @@
 'use client'
+import type { PropsWithChildren } from 'react'
 import classNames from '@/utils/classnames'
 
 type IChildrenProps = {
@@ -139,3 +140,9 @@ export function SubProperty({ name, type, children }: ISubProperty) {
     </li>
   )
 }
+
+export function PropertyInstruction({ children }: PropsWithChildren<{}>) {
+  return (
+    <li className="m-0 px-0 py-4 first:pt-0 italic">{children}</li>
+  )
+}

+ 2 - 0
web/i18n/en-US/dataset.ts

@@ -166,6 +166,8 @@ const translation = {
     cancel: 'Cancel',
   },
   preprocessDocument: '{{num}} Preprocess Documents',
+  allKnowledge: 'All Knowledge',
+  allKnowledgeDescription: 'Select to display all knowledge in this workspace. Only the Workspace Owner can manage all knowledge.',
 }
 
 export default translation

+ 2 - 0
web/i18n/zh-Hans/dataset.ts

@@ -166,6 +166,8 @@ const translation = {
     cancel: '取消',
   },
   preprocessDocument: '{{num}} 个预处理文档',
+  allKnowledge: '所有知识库',
+  allKnowledgeDescription: '选择以显示该工作区内所有知识库。只有工作区所有者才能管理所有知识库。',
 }
 
 export default translation

+ 11 - 0
web/models/datasets.ts

@@ -132,6 +132,17 @@ export type FileItem = {
   progress: number
 }
 
+export type FetchDatasetsParams = {
+  url: string
+  params: {
+    page: number
+    tag_ids?: string[]
+    limit: number
+    include_all: boolean
+    keyword?: string
+  }
+}
+
 export type DataSetListResponse = {
   data: DataSet[]
   has_more: boolean

+ 2 - 1
web/service/datasets.ts

@@ -13,6 +13,7 @@ import type {
   ExternalAPIUsage,
   ExternalKnowledgeBaseHitTestingResponse,
   ExternalKnowledgeItem,
+  FetchDatasetsParams,
   FileIndexingEstimateResponse,
   HitTestingRecordsResponse,
   HitTestingResponse,
@@ -67,7 +68,7 @@ export const fetchDatasetRelatedApps: Fetcher<RelatedAppResponse, string> = (dat
   return get<RelatedAppResponse>(`/datasets/${datasetId}/related-apps`)
 }
 
-export const fetchDatasets: Fetcher<DataSetListResponse, { url: string; params: { page: number; ids?: string[]; limit?: number } }> = ({ url, params }) => {
+export const fetchDatasets: Fetcher<DataSetListResponse, FetchDatasetsParams> = ({ url, params }) => {
   const urlParams = qs.stringify(params, { indices: false })
   return get<DataSetListResponse>(`${url}?${urlParams}`)
 }