Parcourir la source

feat: support firecrawl frontend code (#5226)

Joel il y a 10 mois
Parent
commit
28554350de
51 fichiers modifiés avec 1979 ajouts et 145 suppressions
  1. 3 0
      web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx
  2. 4 4
      web/app/(commonLayout)/apps/Apps.tsx
  3. 5 0
      web/app/components/base/icons/assets/public/other/row-struct.svg
  4. 5 0
      web/app/components/base/icons/assets/vender/line/others/icon-3-dots.svg
  5. 56 0
      web/app/components/base/icons/src/public/other/RowStruct.json
  6. 16 0
      web/app/components/base/icons/src/public/other/RowStruct.tsx
  7. 1 0
      web/app/components/base/icons/src/public/other/index.ts
  8. 39 0
      web/app/components/base/icons/src/vender/line/others/Icon3Dots.json
  9. 16 0
      web/app/components/base/icons/src/vender/line/others/Icon3Dots.tsx
  10. 1 0
      web/app/components/base/icons/src/vender/line/others/index.ts
  11. 38 15
      web/app/components/datasets/create/index.tsx
  12. 48 6
      web/app/components/datasets/create/step-one/index.tsx
  13. 1 0
      web/app/components/datasets/create/step-two/index.module.css
  14. 52 2
      web/app/components/datasets/create/step-two/index.tsx
  15. 29 0
      web/app/components/datasets/create/website/firecrawl/base/checkbox-with-label.tsx
  16. 30 0
      web/app/components/datasets/create/website/firecrawl/base/error-message.tsx
  17. 54 0
      web/app/components/datasets/create/website/firecrawl/base/field.tsx
  18. 58 0
      web/app/components/datasets/create/website/firecrawl/base/input.tsx
  19. 55 0
      web/app/components/datasets/create/website/firecrawl/base/options-wrap.tsx
  20. 48 0
      web/app/components/datasets/create/website/firecrawl/base/url-input.tsx
  21. 40 0
      web/app/components/datasets/create/website/firecrawl/crawled-result-item.tsx
  22. 87 0
      web/app/components/datasets/create/website/firecrawl/crawled-result.tsx
  23. 37 0
      web/app/components/datasets/create/website/firecrawl/crawling.tsx
  24. 42 0
      web/app/components/datasets/create/website/firecrawl/header.tsx
  25. 216 0
      web/app/components/datasets/create/website/firecrawl/index.tsx
  26. 24 0
      web/app/components/datasets/create/website/firecrawl/mock-crawl-result.ts
  27. 83 0
      web/app/components/datasets/create/website/firecrawl/options.tsx
  28. 72 0
      web/app/components/datasets/create/website/index.tsx
  29. 36 0
      web/app/components/datasets/create/website/no-data.tsx
  30. 41 0
      web/app/components/datasets/create/website/preview.tsx
  31. 10 0
      web/app/components/datasets/documents/detail/settings/index.tsx
  32. 4 1
      web/app/components/datasets/documents/index.tsx
  33. 14 8
      web/app/components/datasets/documents/list.tsx
  34. 49 101
      web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx
  35. 8 6
      web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx
  36. 163 0
      web/app/components/header/account-setting/data-source-page/data-source-website/config-firecrawl-modal.tsx
  37. 82 0
      web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx
  38. 2 0
      web/app/components/header/account-setting/data-source-page/index.tsx
  39. 78 0
      web/app/components/header/account-setting/data-source-page/panel/config-item.tsx
  40. 138 0
      web/app/components/header/account-setting/data-source-page/panel/index.tsx
  41. 5 0
      web/app/components/header/account-setting/data-source-page/panel/style.module.css
  42. 4 0
      web/app/components/header/account-setting/data-source-page/panel/types.ts
  43. 13 0
      web/i18n/en-US/common.ts
  44. 32 0
      web/i18n/en-US/dataset-creation.ts
  45. 2 1
      web/i18n/en-US/dataset-documents.ts
  46. 13 0
      web/i18n/zh-Hans/common.ts
  47. 32 0
      web/i18n/zh-Hans/dataset-creation.ts
  48. 1 0
      web/i18n/zh-Hans/dataset-documents.ts
  49. 33 0
      web/models/common.ts
  50. 24 1
      web/models/datasets.ts
  51. 35 0
      web/service/datasets.ts

+ 3 - 0
web/app/(commonLayout)/app/(appDetailLayout)/[appId]/layout.tsx

@@ -109,6 +109,9 @@ const AppDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
         setAppDetail(res)
         setNavigation(getNavigations(appId, isCurrentWorkspaceManager, isCurrentWorkspaceEditor, res.mode))
       }
+    }).catch((e: any) => {
+      if (e.status === 404)
+        router.replace('/apps')
     })
   }, [appId, isCurrentWorkspaceManager, isCurrentWorkspaceEditor])
 

+ 4 - 4
web/app/(commonLayout)/apps/Apps.tsx

@@ -73,10 +73,10 @@ const Apps = () => {
 
   const anchorRef = useRef<HTMLDivElement>(null)
   const options = [
-    { value: 'all', text: t('app.types.all'), icon: <DotsGrid className='w-[14px] h-[14px] mr-1'/> },
-    { value: 'chat', text: t('app.types.chatbot'), icon: <ChatBot className='w-[14px] h-[14px] mr-1'/> },
-    { value: 'agent-chat', text: t('app.types.agent'), icon: <CuteRobot className='w-[14px] h-[14px] mr-1'/> },
-    { value: 'workflow', text: t('app.types.workflow'), icon: <Route className='w-[14px] h-[14px] mr-1'/> },
+    { value: 'all', text: t('app.types.all'), icon: <DotsGrid className='w-[14px] h-[14px] mr-1' /> },
+    { value: 'chat', text: t('app.types.chatbot'), icon: <ChatBot className='w-[14px] h-[14px] mr-1' /> },
+    { value: 'agent-chat', text: t('app.types.agent'), icon: <CuteRobot className='w-[14px] h-[14px] mr-1' /> },
+    { value: 'workflow', text: t('app.types.workflow'), icon: <Route className='w-[14px] h-[14px] mr-1' /> },
   ]
 
   useEffect(() => {

+ 5 - 0
web/app/components/base/icons/assets/public/other/row-struct.svg

@@ -0,0 +1,5 @@
+<svg width="624" height="48" viewBox="0 0 624 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect x="8" y="7" width="16" height="16" rx="5" fill="#F2F4F7"/>
+<rect x="32" y="10" width="233" height="10" rx="3" fill="#EAECF0"/>
+<rect x="32" y="31" width="345" height="6" rx="3" fill="#F2F4F7"/>
+</svg>

+ 5 - 0
web/app/components/base/icons/assets/vender/line/others/icon-3-dots.svg

@@ -0,0 +1,5 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g id="Icon-3-dots">
+<path id="Icon" d="M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103" stroke="#374151" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+</svg>

+ 56 - 0
web/app/components/base/icons/src/public/other/RowStruct.json

@@ -0,0 +1,56 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "624",
+			"height": "48",
+			"viewBox": "0 0 624 48",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "rect",
+				"attributes": {
+					"x": "8",
+					"y": "7",
+					"width": "16",
+					"height": "16",
+					"rx": "5",
+					"fill": "#F2F4F7"
+				},
+				"children": []
+			},
+			{
+				"type": "element",
+				"name": "rect",
+				"attributes": {
+					"x": "32",
+					"y": "10",
+					"width": "233",
+					"height": "10",
+					"rx": "3",
+					"fill": "#EAECF0"
+				},
+				"children": []
+			},
+			{
+				"type": "element",
+				"name": "rect",
+				"attributes": {
+					"x": "32",
+					"y": "31",
+					"width": "345",
+					"height": "6",
+					"rx": "3",
+					"fill": "#F2F4F7"
+				},
+				"children": []
+			}
+		]
+	},
+	"name": "RowStruct"
+}

+ 16 - 0
web/app/components/base/icons/src/public/other/RowStruct.tsx

@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './RowStruct.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'RowStruct'
+
+export default Icon

+ 1 - 0
web/app/components/base/icons/src/public/other/index.ts

@@ -1,2 +1,3 @@
 export { default as Icon3Dots } from './Icon3Dots'
 export { default as DefaultToolIcon } from './DefaultToolIcon'
+export { default as RowStruct } from './RowStruct'

+ 39 - 0
web/app/components/base/icons/src/vender/line/others/Icon3Dots.json

@@ -0,0 +1,39 @@
+{
+	"icon": {
+		"type": "element",
+		"isRootNode": true,
+		"name": "svg",
+		"attributes": {
+			"width": "16",
+			"height": "16",
+			"viewBox": "0 0 16 16",
+			"fill": "none",
+			"xmlns": "http://www.w3.org/2000/svg"
+		},
+		"children": [
+			{
+				"type": "element",
+				"name": "g",
+				"attributes": {
+					"id": "Icon-3-dots"
+				},
+				"children": [
+					{
+						"type": "element",
+						"name": "path",
+						"attributes": {
+							"id": "Icon",
+							"d": "M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103",
+							"stroke": "currentColor",
+							"stroke-width": "2",
+							"stroke-linecap": "round",
+							"stroke-linejoin": "round"
+						},
+						"children": []
+					}
+				]
+			}
+		]
+	},
+	"name": "Icon3Dots"
+}

+ 16 - 0
web/app/components/base/icons/src/vender/line/others/Icon3Dots.tsx

@@ -0,0 +1,16 @@
+// GENERATE BY script
+// DON NOT EDIT IT MANUALLY
+
+import * as React from 'react'
+import data from './Icon3Dots.json'
+import IconBase from '@/app/components/base/icons/IconBase'
+import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
+
+const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
+  props,
+  ref,
+) => <IconBase {...props} ref={ref} data={data as IconData} />)
+
+Icon.displayName = 'Icon3Dots'
+
+export default Icon

+ 1 - 0
web/app/components/base/icons/src/vender/line/others/index.ts

@@ -3,4 +3,5 @@ export { default as Colors } from './Colors'
 export { default as DragHandle } from './DragHandle'
 export { default as Exchange02 } from './Exchange02'
 export { default as FileCode } from './FileCode'
+export { default as Icon3Dots } from './Icon3Dots'
 export { default as Tools } from './Tools'

+ 38 - 15
web/app/components/datasets/create/index.tsx

@@ -8,7 +8,7 @@ import StepOne from './step-one'
 import StepTwo from './step-two'
 import StepThree from './step-three'
 import { DataSourceType } from '@/models/datasets'
-import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
+import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
 import { fetchDataSource } from '@/service/common'
 import { fetchDatasetDetail } from '@/service/datasets'
 import type { NotionPage } from '@/models/common'
@@ -19,6 +19,15 @@ type DatasetUpdateFormProps = {
   datasetId?: string
 }
 
+const DEFAULT_CRAWL_OPTIONS: CrawlOptions = {
+  crawl_sub_pages: true,
+  only_main_content: true,
+  includes: '',
+  excludes: '',
+  limit: 10,
+  max_depth: '',
+}
+
 const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const { t } = useTranslation()
   const { setShowAccountSettingModal } = useModalContext()
@@ -36,9 +45,13 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
     setNotionPages(value)
   }
 
+  const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
+  const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS)
+
   const updateFileList = (preparedFiles: FileItem[]) => {
     setFiles(preparedFiles)
   }
+  const [fireCrawlJobId, setFireCrawlJobId] = useState('')
 
   const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
     const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
@@ -108,20 +121,27 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
         <StepsNavBar step={step} datasetId={datasetId} />
       </div>
       <div className="grow bg-white">
-        {step === 1 && <StepOne
-          hasConnection={hasConnection}
-          onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
-          datasetId={datasetId}
-          dataSourceType={dataSourceType}
-          dataSourceTypeDisable={!!detail?.data_source_type}
-          changeType={setDataSourceType}
-          files={fileList}
-          updateFile={updateFile}
-          updateFileList={updateFileList}
-          notionPages={notionPages}
-          updateNotionPages={updateNotionPages}
-          onStepChange={nextStep}
-        />}
+        <div className={step === 1 ? 'block h-full' : 'hidden'}>
+          <StepOne
+            hasConnection={hasConnection}
+            onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
+            datasetId={datasetId}
+            dataSourceType={dataSourceType}
+            dataSourceTypeDisable={!!detail?.data_source_type}
+            changeType={setDataSourceType}
+            files={fileList}
+            updateFile={updateFile}
+            updateFileList={updateFileList}
+            notionPages={notionPages}
+            updateNotionPages={updateNotionPages}
+            onStepChange={nextStep}
+            websitePages={websitePages}
+            updateWebsitePages={setWebsitePages}
+            onFireCrawlJobIdChange={setFireCrawlJobId}
+            crawlOptions={crawlOptions}
+            onCrawlOptionsChange={setCrawlOptions}
+          />
+        </div>
         {(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
           isAPIKeySet={!!embeddingsDefaultModel}
           onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
@@ -130,9 +150,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
           dataSourceType={dataSourceType}
           files={fileList.map(file => file.file)}
           notionPages={notionPages}
+          websitePages={websitePages}
+          fireCrawlJobId={fireCrawlJobId}
           onStepChange={changeStep}
           updateIndexingTypeCache={updateIndexingTypeCache}
           updateResultCache={updateResultCache}
+          crawlOptions={crawlOptions}
         />}
         {step === 3 && <StepThree
           datasetId={datasetId}

+ 48 - 6
web/app/components/datasets/create/step-one/index.tsx

@@ -6,8 +6,10 @@ import FilePreview from '../file-preview'
 import FileUploader from '../file-uploader'
 import NotionPagePreview from '../notion-page-preview'
 import EmptyDatasetCreationModal from '../empty-dataset-creation-modal'
+import Website from '../website'
+import WebsitePreview from '../website/preview'
 import s from './index.module.css'
-import type { FileItem } from '@/models/datasets'
+import type { CrawlOptions, CrawlResultItem, FileItem } from '@/models/datasets'
 import type { NotionPage } from '@/models/common'
 import { DataSourceType } from '@/models/datasets'
 import Button from '@/app/components/base/button'
@@ -29,6 +31,11 @@ type IStepOneProps = {
   updateNotionPages: (value: NotionPage[]) => void
   onStepChange: () => void
   changeType: (type: DataSourceType) => void
+  websitePages?: CrawlResultItem[]
+  updateWebsitePages: (value: CrawlResultItem[]) => void
+  onFireCrawlJobIdChange: (jobId: string) => void
+  crawlOptions: CrawlOptions
+  onCrawlOptionsChange: (payload: CrawlOptions) => void
 }
 
 type NotionConnectorProps = {
@@ -49,7 +56,7 @@ export const NotionConnector = ({ onSetting }: NotionConnectorProps) => {
 
 const StepOne = ({
   datasetId,
-  dataSourceType,
+  dataSourceType: inCreatePageDataSourceType,
   dataSourceTypeDisable,
   changeType,
   hasConnection,
@@ -60,11 +67,17 @@ const StepOne = ({
   updateFile,
   notionPages = [],
   updateNotionPages,
+  websitePages = [],
+  updateWebsitePages,
+  onFireCrawlJobIdChange,
+  crawlOptions,
+  onCrawlOptionsChange,
 }: IStepOneProps) => {
   const { dataset } = useDatasetDetailContext()
   const [showModal, setShowModal] = useState(false)
   const [currentFile, setCurrentFile] = useState<File | undefined>()
   const [currentNotionPage, setCurrentNotionPage] = useState<NotionPage | undefined>()
+  const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>()
   const { t } = useTranslation()
 
   const modalShowHandle = () => setShowModal(true)
@@ -85,8 +98,13 @@ const StepOne = ({
     setCurrentNotionPage(undefined)
   }
 
-  const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type)
+  const hideWebsitePreview = () => {
+    setCurrentWebsite(undefined)
+  }
 
+  const shouldShowDataSourceTypeList = !datasetId || (datasetId && !dataset?.data_source_type)
+  const isInCreatePage = shouldShowDataSourceTypeList
+  const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : dataset?.data_source_type
   const { plan, enableBilling } = useProviderContext()
   const allFileLoaded = (files.length > 0 && files.every(file => file.file.id))
   const hasNotin = notionPages.length > 0
@@ -150,10 +168,13 @@ const StepOne = ({
                   {t('datasetCreation.stepOne.dataSourceType.notion')}
                 </div>
                 <div
-                  className={cn(s.dataSourceItem, s.disabled, dataSourceType === DataSourceType.WEB && s.active)}
-                // onClick={() => changeType(DataSourceType.WEB)}
+                  className={cn(
+                    s.dataSourceItem,
+                    dataSourceType === DataSourceType.WEB && s.active,
+                    dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled,
+                  )}
+                  onClick={() => changeType(DataSourceType.WEB)}
                 >
-                  <span className={s.comingTag}>Coming soon</span>
                   <span className={cn(s.datasetIcon, s.web)} />
                   {t('datasetCreation.stepOne.dataSourceType.web')}
                 </div>
@@ -201,6 +222,26 @@ const StepOne = ({
               )}
             </>
           )}
+          {dataSourceType === DataSourceType.WEB && (
+            <>
+              <div className={cn('mb-8 w-[640px]', !shouldShowDataSourceTypeList && 'mt-12')}>
+                <Website
+                  onPreview={setCurrentWebsite}
+                  checkedCrawlResult={websitePages}
+                  onCheckedCrawlResultChange={updateWebsitePages}
+                  onJobIdChange={onFireCrawlJobIdChange}
+                  crawlOptions={crawlOptions}
+                  onCrawlOptionsChange={onCrawlOptionsChange}
+                />
+              </div>
+              {isShowVectorSpaceFull && (
+                <div className='max-w-[640px] mb-4'>
+                  <VectorSpaceFull />
+                </div>
+              )}
+              <Button disabled={isShowVectorSpaceFull || !websitePages.length} className={s.submitButton} type='primary' onClick={onStepChange}>{t('datasetCreation.stepOne.button')}</Button>
+            </>
+          )}
           {!datasetId && (
             <>
               <div className={s.dividerLine} />
@@ -212,6 +253,7 @@ const StepOne = ({
       </div>
       {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
       {currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
+      {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
     </div>
   )
 }

+ 1 - 0
web/app/components/datasets/create/step-two/index.module.css

@@ -323,6 +323,7 @@
 }
 
 .sourceContent {
+  width: 0;
   flex: 1 1 auto;
 }
 

+ 52 - 2
web/app/components/datasets/create/step-two/index.tsx

@@ -12,7 +12,7 @@ import RetrievalMethodInfo from '../../common/retrieval-method-info'
 import PreviewItem, { PreviewType } from './preview-item'
 import LanguageSelect from './language-select'
 import s from './index.module.css'
-import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
+import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
 import {
   createDocument,
   createFirstDocument,
@@ -44,6 +44,7 @@ import TooltipPlus from '@/app/components/base/tooltip-plus'
 import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { LanguagesSupported } from '@/i18n/language'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
 
 type ValueOf<T> = T[keyof T]
 type StepTwoProps = {
@@ -56,6 +57,9 @@ type StepTwoProps = {
   dataSourceType: DataSourceType
   files: CustomFile[]
   notionPages?: NotionPage[]
+  websitePages?: CrawlResultItem[]
+  crawlOptions?: CrawlOptions
+  fireCrawlJobId?: string
   onStepChange?: (delta: number) => void
   updateIndexingTypeCache?: (type: string) => void
   updateResultCache?: (res: createDocumentResponse) => void
@@ -79,9 +83,12 @@ const StepTwo = ({
   onSetting,
   datasetId,
   indexingType,
-  dataSourceType,
+  dataSourceType: inCreatePageDataSourceType,
   files,
   notionPages = [],
+  websitePages = [],
+  crawlOptions,
+  fireCrawlJobId = '',
   onStepChange,
   updateIndexingTypeCache,
   updateResultCache,
@@ -94,6 +101,8 @@ const StepTwo = ({
   const isMobile = media === MediaType.mobile
 
   const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
+  const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
+  const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
   const scrollRef = useRef<HTMLDivElement>(null)
   const [scrolled, setScrolled] = useState(false)
   const previewScrollRef = useRef<HTMLDivElement>(null)
@@ -242,6 +251,15 @@ const StepTwo = ({
     }) as NotionInfo[]
   }
 
+  const getWebsiteInfo = () => {
+    return {
+      provider: 'firecrawl',
+      job_id: fireCrawlJobId,
+      urls: websitePages.map(page => page.source_url),
+      only_main_content: crawlOptions?.only_main_content,
+    }
+  }
+
   const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
     if (dataSourceType === DataSourceType.FILE) {
       return {
@@ -271,6 +289,19 @@ const StepTwo = ({
         dataset_id: datasetId as string,
       }
     }
+    if (dataSourceType === DataSourceType.WEB) {
+      return {
+        info_list: {
+          data_source_type: dataSourceType,
+          website_info_list: getWebsiteInfo(),
+        },
+        indexing_technique: getIndexing_technique() as string,
+        process_rule: getProcessRule(),
+        doc_form: docForm,
+        doc_language: docLanguage,
+        dataset_id: datasetId as string,
+      }
+    }
   }
   const {
     modelList: rerankModelList,
@@ -335,6 +366,9 @@ const StepTwo = ({
       }
       if (dataSourceType === DataSourceType.NOTION)
         params.data_source.info_list.notion_info_list = getNotionInfo()
+
+      if (dataSourceType === DataSourceType.WEB)
+        params.data_source.info_list.website_info_list = getWebsiteInfo()
     }
     return params
   }
@@ -819,6 +853,22 @@ const StepTwo = ({
                     </div>
                   </>
                 )}
+                {dataSourceType === DataSourceType.WEB && (
+                  <>
+                    <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div>
+                    <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
+                      <Globe01 className='shrink-0 mr-1' />
+                      <span className='grow w-0 truncate'>{websitePages[0].source_url}</span>
+                      {websitePages.length > 1 && (
+                        <span className={s.sourceCount}>
+                          <span>{t('datasetCreation.stepTwo.other')}</span>
+                          <span>{websitePages.length - 1}</span>
+                          <span>{t('datasetCreation.stepTwo.webpageUnit')}</span>
+                        </span>
+                      )}
+                    </div>
+                  </>
+                )}
               </div>
               <div className={s.divider} />
               <div className={s.segmentCount}>

+ 29 - 0
web/app/components/datasets/create/website/firecrawl/base/checkbox-with-label.tsx

@@ -0,0 +1,29 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import cn from 'classnames'
+import Checkbox from '@/app/components/base/checkbox'
+
+type Props = {
+  className?: string
+  isChecked: boolean
+  onChange: (isChecked: boolean) => void
+  label: string
+  labelClassName?: string
+}
+
+const CheckboxWithLabel: FC<Props> = ({
+  className = '',
+  isChecked,
+  onChange,
+  label,
+  labelClassName,
+}) => {
+  return (
+    <label className={cn(className, 'flex items-center h-7 space-x-2')}>
+      <Checkbox checked={isChecked} onCheck={() => onChange(!isChecked)} />
+      <div className={cn(labelClassName, 'text-sm font-normal text-gray-800')}>{label}</div>
+    </label>
+  )
+}
+export default React.memo(CheckboxWithLabel)

+ 30 - 0
web/app/components/datasets/create/website/firecrawl/base/error-message.tsx

@@ -0,0 +1,30 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import cn from 'classnames'
+import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
+
+type Props = {
+  className?: string
+  title: string
+  errorMsg?: string
+}
+
+const ErrorMessage: FC<Props> = ({
+  className,
+  title,
+  errorMsg,
+}) => {
+  return (
+    <div className={cn(className, 'py-2 px-4 border-t border-gray-200 bg-[#FFFAEB]')}>
+      <div className='flex items-center h-5'>
+        <AlertTriangle className='mr-2 w-4 h-4 text-[#F79009]' />
+        <div className='text-sm font-medium text-[#DC6803]'>{title}</div>
+      </div>
+      {errorMsg && (
+        <div className='mt-1 pl-6 leading-[18px] text-xs font-normal text-gray-700'>{errorMsg}</div>
+      )}
+    </div>
+  )
+}
+export default React.memo(ErrorMessage)

+ 54 - 0
web/app/components/datasets/create/website/firecrawl/base/field.tsx

@@ -0,0 +1,54 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import cn from 'classnames'
+import Input from './input'
+import TooltipPlus from '@/app/components/base/tooltip-plus'
+import { HelpCircle } from '@/app/components/base/icons/src/vender/line/general'
+
+type Props = {
+  className?: string
+  label: string
+  labelClassName?: string
+  value: string | number
+  onChange: (value: string | number) => void
+  isRequired?: boolean
+  placeholder?: string
+  isNumber?: boolean
+  tooltip?: string
+}
+
+const Field: FC<Props> = ({
+  className,
+  label,
+  labelClassName,
+  value,
+  onChange,
+  isRequired = false,
+  placeholder = '',
+  isNumber = false,
+  tooltip,
+}) => {
+  return (
+    <div className={cn(className)}>
+      <div className='flex py-[7px]'>
+        <div className={cn(labelClassName, 'flex items-center h-[18px] text-[13px] font-medium text-gray-900')}>{label} </div>
+        {isRequired && <span className='ml-0.5 text-xs font-semibold text-[#D92D20]'>*</span>}
+        {tooltip && (
+          <TooltipPlus popupContent={
+            <div className='w-[200px]'>{tooltip}</div>
+          }>
+            <HelpCircle className='relative top-[3px] w-3 h-3 ml-1 text-gray-500' />
+          </TooltipPlus>
+        )}
+      </div>
+      <Input
+        value={value}
+        onChange={onChange}
+        placeholder={placeholder}
+        isNumber={isNumber}
+      />
+    </div>
+  )
+}
+export default React.memo(Field)

+ 58 - 0
web/app/components/datasets/create/website/firecrawl/base/input.tsx

@@ -0,0 +1,58 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+
+type Props = {
+  value: string | number
+  onChange: (value: string | number) => void
+  placeholder?: string
+  isNumber?: boolean
+}
+
+const MIN_VALUE = 1
+
+const Input: FC<Props> = ({
+  value,
+  onChange,
+  placeholder = '',
+  isNumber = false,
+}) => {
+  const handleChange = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
+    const value = e.target.value
+    if (isNumber) {
+      let numberValue = parseInt(value, 10) // integer only
+      if (isNaN(numberValue)) {
+        onChange('')
+        return
+      }
+      if (numberValue < MIN_VALUE)
+        numberValue = MIN_VALUE
+
+      onChange(numberValue)
+      return
+    }
+    onChange(value)
+  }, [isNumber, onChange])
+
+  const otherOption = (() => {
+    if (isNumber) {
+      return {
+        min: MIN_VALUE,
+      }
+    }
+    return {
+
+    }
+  })()
+  return (
+    <input
+      type={isNumber ? 'number' : 'text'}
+      {...otherOption}
+      value={value}
+      onChange={handleChange}
+      className='flex h-9 w-full py-1 px-2 rounded-lg text-xs leading-normal bg-gray-100 caret-primary-600 hover:bg-gray-100 focus:ring-1 focus:ring-inset focus:ring-gray-200 focus-visible:outline-none focus:bg-gray-50 placeholder:text-gray-400'
+      placeholder={placeholder}
+    />
+  )
+}
+export default React.memo(Input)

+ 55 - 0
web/app/components/datasets/create/website/firecrawl/base/options-wrap.tsx

@@ -0,0 +1,55 @@
+'use client'
+import { useBoolean } from 'ahooks'
+import type { FC } from 'react'
+import React, { useEffect } from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import { Settings04 } from '@/app/components/base/icons/src/vender/line/general'
+import { ChevronRight } from '@/app/components/base/icons/src/vender/line/arrows'
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  className?: string
+  children: React.ReactNode
+  controlFoldOptions?: number
+}
+
+const OptionsWrap: FC<Props> = ({
+  className = '',
+  children,
+  controlFoldOptions,
+}) => {
+  const { t } = useTranslation()
+
+  const [fold, {
+    toggle: foldToggle,
+    setTrue: foldHide,
+  }] = useBoolean(false)
+
+  useEffect(() => {
+    if (controlFoldOptions)
+      foldHide()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [controlFoldOptions])
+  return (
+    <div className={cn(className, !fold ? 'mb-0' : 'mb-3')}>
+      <div
+        className='flex justify-between items-center h-[26px] py-1 cursor-pointer select-none'
+        onClick={foldToggle}
+      >
+        <div className='flex items-center text-gray-700'>
+          <Settings04 className='mr-1 w-4 h-4' />
+          <div className='text-[13px] font-semibold text-gray-800 uppercase'>{t(`${I18N_PREFIX}.options`)}</div>
+        </div>
+        <ChevronRight className={cn(!fold && 'rotate-90', 'w-4 h-4 text-gray-500')} />
+      </div>
+      {!fold && (
+        <div className='mb-4'>
+          {children}
+        </div>
+      )}
+
+    </div>
+  )
+}
+export default React.memo(OptionsWrap)

+ 48 - 0
web/app/components/datasets/create/website/firecrawl/base/url-input.tsx

@@ -0,0 +1,48 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import Input from './input'
+import Button from '@/app/components/base/button'
+
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  isRunning: boolean
+  onRun: (url: string) => void
+}
+
+const UrlInput: FC<Props> = ({
+  isRunning,
+  onRun,
+}) => {
+  const { t } = useTranslation()
+  const [url, setUrl] = useState('')
+  const handleUrlChange = useCallback((url: string | number) => {
+    setUrl(url as string)
+  }, [])
+  const handleOnRun = useCallback(() => {
+    if (isRunning)
+      return
+    onRun(url)
+  }, [isRunning, onRun, url])
+
+  return (
+    <div className='flex items-center justify-between'>
+      <Input
+        value={url}
+        onChange={handleUrlChange}
+        placeholder='https://docs.dify.ai'
+      />
+      <Button
+        type='primary'
+        onClick={handleOnRun}
+        className='ml-2 !h-8 text-[13px] font-medium'
+        loading={isRunning}
+      >
+        {!isRunning ? t(`${I18N_PREFIX}.run`) : ''}
+      </Button>
+    </div>
+  )
+}
+export default React.memo(UrlInput)

+ 40 - 0
web/app/components/datasets/create/website/firecrawl/crawled-result-item.tsx

@@ -0,0 +1,40 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets'
+import Checkbox from '@/app/components/base/checkbox'
+
+type Props = {
+  payload: CrawlResultItemType
+  isChecked: boolean
+  isPreview: boolean
+  onCheckChange: (checked: boolean) => void
+  onPreview: () => void
+}
+
+const CrawledResultItem: FC<Props> = ({
+  isPreview,
+  payload,
+  isChecked,
+  onCheckChange,
+  onPreview,
+}) => {
+  const { t } = useTranslation()
+
+  const handleCheckChange = useCallback(() => {
+    onCheckChange(!isChecked)
+  }, [isChecked, onCheckChange])
+  return (
+    <div className={cn(isPreview ? 'border-[#D1E0FF] bg-primary-50 shadow-xs' : 'group hover:bg-gray-100', 'rounded-md px-2 py-[5px] cursor-pointer border border-transparent')}>
+      <div className='flex items-center h-5'>
+        <Checkbox className='group-hover:border-2 group-hover:border-primary-600 mr-2 shrink-0' checked={isChecked} onCheck={handleCheckChange} />
+        <div className='grow w-0 truncate text-sm font-medium text-gray-700' title={payload.title}>{payload.title}</div>
+        <div onClick={onPreview} className='hidden group-hover:flex items-center h-6 px-2 text-xs rounded-md font-medium text-gray-500 uppercase hover:bg-gray-50'>{t('datasetCreation.stepOne.website.preview')}</div>
+      </div>
+      <div className='mt-0.5 truncate pl-6 leading-[18px] text-xs font-normal text-gray-500' title={payload.source_url}>{payload.source_url}</div>
+    </div>
+  )
+}
+export default React.memo(CrawledResultItem)

+ 87 - 0
web/app/components/datasets/create/website/firecrawl/crawled-result.tsx

@@ -0,0 +1,87 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import CheckboxWithLabel from './base/checkbox-with-label'
+import CrawledResultItem from './crawled-result-item'
+import type { CrawlResultItem } from '@/models/datasets'
+
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  className?: string
+  list: CrawlResultItem[]
+  checkedList: CrawlResultItem[]
+  onSelectedChange: (selected: CrawlResultItem[]) => void
+  onPreview: (payload: CrawlResultItem) => void
+  usedTime: number
+}
+
+const CrawledResult: FC<Props> = ({
+  className = '',
+  list,
+  checkedList,
+  onSelectedChange,
+  onPreview,
+  usedTime,
+}) => {
+  const { t } = useTranslation()
+
+  const isCheckAll = checkedList.length === list.length
+
+  const handleCheckedAll = useCallback(() => {
+    if (!isCheckAll)
+      onSelectedChange(list)
+
+    else
+      onSelectedChange([])
+  }, [isCheckAll, list, onSelectedChange])
+
+  const handleItemCheckChange = useCallback((item: CrawlResultItem) => {
+    return (checked: boolean) => {
+      if (checked)
+        onSelectedChange([...checkedList, item])
+
+      else
+        onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url))
+    }
+  }, [checkedList, onSelectedChange])
+
+  const [previewIndex, setPreviewIndex] = React.useState<number>(-1)
+  const handlePreview = useCallback((index: number) => {
+    return () => {
+      setPreviewIndex(index)
+      onPreview(list[index])
+    }
+  }, [list, onPreview])
+
+  return (
+    <div className={cn(className, 'border-t border-gray-200')}>
+      <div className='flex items-center justify-between h-[34px] px-4 bg-gray-50 shadow-xs border-b-[0.5px] border-black/8 text-xs font-normal text-gray-700'>
+        <CheckboxWithLabel
+          isChecked={isCheckAll}
+          onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
+          labelClassName='!font-medium'
+        />
+        <div>{t(`${I18N_PREFIX}.scrapTimeInfo`, {
+          total: list.length,
+          time: usedTime.toFixed(1),
+        })}</div>
+      </div>
+      <div className='p-2'>
+        {list.map((item, index) => (
+          <CrawledResultItem
+            key={item.source_url}
+            isPreview={index === previewIndex}
+            onPreview={handlePreview(index)}
+            payload={item}
+            isChecked={checkedList.some(checkedItem => checkedItem.source_url === item.source_url)}
+            onCheckChange={handleItemCheckChange(item)}
+          />
+        ))}
+      </div>
+    </div>
+  )
+}
+export default React.memo(CrawledResult)

+ 37 - 0
web/app/components/datasets/create/website/firecrawl/crawling.tsx

@@ -0,0 +1,37 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import cn from 'classnames'
+import { useTranslation } from 'react-i18next'
+import { RowStruct } from '@/app/components/base/icons/src/public/other'
+
+type Props = {
+  className?: string
+  crawledNum: number
+  totalNum: number
+}
+
+const Crawling: FC<Props> = ({
+  className = '',
+  crawledNum,
+  totalNum,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className={cn(className, 'border-t border-gray-200')}>
+      <div className='flex items-center h-[34px] px-4 bg-gray-50 shadow-xs border-b-[0.5px] border-black/8 text-xs font-normal text-gray-700'>
+        {t('datasetCreation.stepOne.website.totalPageScraped')} {crawledNum}/{totalNum}
+      </div>
+
+      <div className='p-2'>
+        {['', '', '', ''].map((item, index) => (
+          <div className='py-[5px]' key={index}>
+            <RowStruct />
+          </div>
+        ))}
+      </div>
+    </div>
+  )
+}
+export default React.memo(Crawling)

+ 42 - 0
web/app/components/datasets/create/website/firecrawl/header.tsx

@@ -0,0 +1,42 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
+import { BookOpen01 } from '@/app/components/base/icons/src/vender/line/education'
+
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  onSetting: () => void
+}
+
+const Header: FC<Props> = ({
+  onSetting,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className='flex h-6 items-center justify-between'>
+      <div className='flex items-center'>
+        <div className='text-base font-medium text-gray-700'>{t(`${I18N_PREFIX}.firecrawlTitle`)}</div>
+        <div className='ml-2 mr-1 w-px h-3.5 bg-gray-200'></div>
+        <div
+          className='p-1 rounded-md hover:bg-black/5 cursor-pointer'
+          onClick={onSetting}
+        >
+          <Settings01 className='w-3.5 h-3.5 text-gray-500' />
+        </div>
+      </div>
+      <a
+        href='https://docs.firecrawl.dev/introduction'
+        target='_blank' rel='noopener noreferrer'
+        className='flex items-center text-xs text-primary-600'
+      >
+        <BookOpen01 className='mr-1 w-3.5 h-3.5 text-primary-600' />
+        {t(`${I18N_PREFIX}.firecrawlDoc`)}
+      </a>
+    </div>
+  )
+}
+export default React.memo(Header)

+ 216 - 0
web/app/components/datasets/create/website/firecrawl/index.tsx

@@ -0,0 +1,216 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import Header from './header'
+import UrlInput from './base/url-input'
+import OptionsWrap from './base/options-wrap'
+import Options from './options'
+import CrawledResult from './crawled-result'
+import Crawling from './crawling'
+import ErrorMessage from './base/error-message'
+import { useModalContext } from '@/context/modal-context'
+import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
+import Toast from '@/app/components/base/toast'
+import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
+import { sleep } from '@/utils'
+
+const ERROR_I18N_PREFIX = 'common.errorMsg'
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  onPreview: (payload: CrawlResultItem) => void
+  checkedCrawlResult: CrawlResultItem[]
+  onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
+  onJobIdChange: (jobId: string) => void
+  crawlOptions: CrawlOptions
+  onCrawlOptionsChange: (payload: CrawlOptions) => void
+}
+
+enum Step {
+  init = 'init',
+  running = 'running',
+  finished = 'finished',
+}
+
+const FireCrawl: FC<Props> = ({
+  onPreview,
+  checkedCrawlResult,
+  onCheckedCrawlResultChange,
+  onJobIdChange,
+  crawlOptions,
+  onCrawlOptionsChange,
+}) => {
+  const { t } = useTranslation()
+  const [step, setStep] = useState<Step>(Step.init)
+  const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
+  useEffect(() => {
+    if (step !== Step.init)
+      setControlFoldOptions(Date.now())
+  }, [step])
+  const { setShowAccountSettingModal } = useModalContext()
+  const handleSetting = useCallback(() => {
+    setShowAccountSettingModal({
+      payload: 'data-source',
+    })
+  }, [setShowAccountSettingModal])
+
+  const checkValid = useCallback((url: string) => {
+    let errorMsg = ''
+    if (!url) {
+      errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
+        field: 'url',
+      })
+    }
+
+    if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
+      errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
+
+    if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
+      errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
+        field: t(`${I18N_PREFIX}.limit`),
+      })
+    }
+
+    return {
+      isValid: !errorMsg,
+      errorMsg,
+    }
+  }, [crawlOptions, t])
+
+  const isInit = step === Step.init
+  const isCrawlFinished = step === Step.finished
+  const isRunning = step === Step.running
+  const [crawlResult, setCrawlResult] = useState<{
+    current: number
+    total: number
+    data: CrawlResultItem[]
+    time_consuming: number | string
+  } | undefined>(undefined)
+  const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
+  const showError = isCrawlFinished && crawlErrorMessage
+
+  const waitForCrawlFinished = useCallback(async (jobId: string) => {
+    try {
+      const res = await checkFirecrawlTaskStatus(jobId) as any
+      if (res.status === 'completed') {
+        return {
+          isError: false,
+          data: {
+            ...res,
+            total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
+          },
+        }
+      }
+      if (res.status === 'error' || !res.status) {
+        // can't get the error message from the firecrawl api
+        return {
+          isError: true,
+          errorMessage: res.message,
+          data: {
+            data: [],
+          },
+        }
+      }
+      // update the progress
+      setCrawlResult({
+        ...res,
+        total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
+      })
+      await sleep(2500)
+      return await waitForCrawlFinished(jobId)
+    }
+    catch (e: any) {
+      const errorBody = await e.json()
+      return {
+        isError: true,
+        errorMessage: errorBody.message,
+        data: {
+          data: [],
+        },
+      }
+    }
+  }, [crawlOptions.limit])
+
+  const handleRun = useCallback(async (url: string) => {
+    const { isValid, errorMsg } = checkValid(url)
+    if (!isValid) {
+      Toast.notify({
+        message: errorMsg!,
+        type: 'error',
+      })
+      return
+    }
+    setStep(Step.running)
+    try {
+      const passToServerCrawlOptions: any = {
+        ...crawlOptions,
+      }
+      if (crawlOptions.max_depth === '')
+        delete passToServerCrawlOptions.max_depth
+
+      const res = await createFirecrawlTask({
+        url,
+        options: passToServerCrawlOptions,
+      }) as any
+      const jobId = res.job_id
+      onJobIdChange(jobId)
+      const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
+      if (isError) {
+        setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
+      }
+      else {
+        setCrawlResult(data)
+        setCrawlErrorMessage('')
+      }
+    }
+    catch (e) {
+      setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
+      console.log(e)
+    }
+    finally {
+      setStep(Step.finished)
+    }
+  }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
+
+  return (
+    <div>
+      <Header onSetting={handleSetting} />
+      <div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}>
+        <UrlInput onRun={handleRun} isRunning={isRunning} />
+        <OptionsWrap
+          className={cn('mt-4')}
+          controlFoldOptions={controlFoldOptions}
+        >
+          <Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
+        </OptionsWrap>
+
+        {!isInit && (
+          <div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'>
+            {isRunning
+              && <Crawling
+                className='mt-2'
+                crawledNum={crawlResult?.current || 0}
+                totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0}
+              />}
+            {showError && (
+              <ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
+            )}
+            {isCrawlFinished && !showError
+              && <CrawledResult
+                className='mb-2'
+                list={crawlResult?.data || []}
+                checkedList={checkedCrawlResult}
+                onSelectedChange={onCheckedCrawlResultChange}
+                onPreview={onPreview}
+                usedTime={parseFloat(crawlResult?.time_consuming as string) || 0}
+              />
+            }
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
+export default React.memo(FireCrawl)

+ 24 - 0
web/app/components/datasets/create/website/firecrawl/mock-crawl-result.ts

@@ -0,0 +1,24 @@
+import type { CrawlResultItem } from '@/models/datasets'
+
+const result: CrawlResultItem[] = [
+  {
+    title: 'Start the frontend Docker container separately',
+    markdown: 'Markdown 1',
+    description: 'Description 1',
+    source_url: 'https://example.com/1',
+  },
+  {
+    title: 'Advanced Tool Integration',
+    markdown: 'Markdown 2',
+    description: 'Description 2',
+    source_url: 'https://example.com/2',
+  },
+  {
+    title: 'Local Source Code Start | English | Dify',
+    markdown: 'Markdown 3',
+    description: 'Description 3',
+    source_url: 'https://example.com/3',
+  },
+]
+
+export default result

+ 83 - 0
web/app/components/datasets/create/website/firecrawl/options.tsx

@@ -0,0 +1,83 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback } from 'react'
+import cn from 'classnames'
+import { useTranslation } from 'react-i18next'
+import CheckboxWithLabel from './base/checkbox-with-label'
+import Field from './base/field'
+import type { CrawlOptions } from '@/models/datasets'
+
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  className?: string
+  payload: CrawlOptions
+  onChange: (payload: CrawlOptions) => void
+}
+
+const Options: FC<Props> = ({
+  className = '',
+  payload,
+  onChange,
+}) => {
+  const { t } = useTranslation()
+
+  const handleChange = useCallback((key: keyof CrawlOptions) => {
+    return (value: any) => {
+      onChange({
+        ...payload,
+        [key]: value,
+      })
+    }
+  }, [payload, onChange])
+  return (
+    <div className={cn(className, ' space-y-2')}>
+      <CheckboxWithLabel
+        label={t(`${I18N_PREFIX}.crawlSubPage`)}
+        isChecked={payload.crawl_sub_pages}
+        onChange={handleChange('crawl_sub_pages')}
+      />
+      <div className='flex justify-between space-x-4'>
+        <Field
+          className='grow shrink-0'
+          label={t(`${I18N_PREFIX}.limit`)}
+          value={payload.limit}
+          onChange={handleChange('limit')}
+          isNumber
+          isRequired
+        />
+        <Field
+          className='grow shrink-0'
+          label={t(`${I18N_PREFIX}.maxDepth`)}
+          value={payload.max_depth}
+          onChange={handleChange('max_depth')}
+          isNumber
+          tooltip={t(`${I18N_PREFIX}.maxDepthTooltip`)!}
+        />
+      </div>
+
+      <div className='flex justify-between space-x-4'>
+        <Field
+          className='grow shrink-0'
+          label={t(`${I18N_PREFIX}.excludePaths`)}
+          value={payload.excludes}
+          onChange={handleChange('excludes')}
+          placeholder='blog/*, /about/*'
+        />
+        <Field
+          className='grow shrink-0'
+          label={t(`${I18N_PREFIX}.includeOnlyPaths`)}
+          value={payload.includes}
+          onChange={handleChange('includes')}
+          placeholder='articles/*'
+        />
+      </div>
+      <CheckboxWithLabel
+        label={t(`${I18N_PREFIX}.extractOnlyMainContent`)}
+        isChecked={payload.only_main_content}
+        onChange={handleChange('only_main_content')}
+      />
+    </div>
+  )
+}
+export default React.memo(Options)

+ 72 - 0
web/app/components/datasets/create/website/index.tsx

@@ -0,0 +1,72 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useEffect, useState } from 'react'
+import NoData from './no-data'
+import Firecrawl from './firecrawl'
+import { useModalContext } from '@/context/modal-context'
+import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
+import { fetchFirecrawlApiKey } from '@/service/datasets'
+import { type DataSourceWebsiteItem, WebsiteProvider } from '@/models/common'
+
+type Props = {
+  onPreview: (payload: CrawlResultItem) => void
+  checkedCrawlResult: CrawlResultItem[]
+  onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
+  onJobIdChange: (jobId: string) => void
+  crawlOptions: CrawlOptions
+  onCrawlOptionsChange: (payload: CrawlOptions) => void
+}
+
+const Website: FC<Props> = ({
+  onPreview,
+  checkedCrawlResult,
+  onCheckedCrawlResultChange,
+  onJobIdChange,
+  crawlOptions,
+  onCrawlOptionsChange,
+}) => {
+  const { setShowAccountSettingModal } = useModalContext()
+  const [isLoaded, setIsLoaded] = useState(false)
+  const [isSetFirecrawlApiKey, setIsSetFirecrawlApiKey] = useState(false)
+  const checkSetApiKey = useCallback(async () => {
+    const res = await fetchFirecrawlApiKey() as any
+    const list = res.settings.filter((item: DataSourceWebsiteItem) => item.provider === WebsiteProvider.fireCrawl && !item.disabled)
+    setIsSetFirecrawlApiKey(list.length > 0)
+  }, [])
+
+  useEffect(() => {
+    checkSetApiKey().then(() => {
+      setIsLoaded(true)
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+  const handleOnConfig = useCallback(() => {
+    setShowAccountSettingModal({
+      payload: 'data-source',
+      onCancelCallback: checkSetApiKey,
+    })
+  }, [checkSetApiKey, setShowAccountSettingModal])
+
+  if (!isLoaded)
+    return null
+
+  return (
+    <div>
+      {isSetFirecrawlApiKey
+        ? (
+          <Firecrawl
+            onPreview={onPreview}
+            checkedCrawlResult={checkedCrawlResult}
+            onCheckedCrawlResultChange={onCheckedCrawlResultChange}
+            onJobIdChange={onJobIdChange}
+            crawlOptions={crawlOptions}
+            onCrawlOptionsChange={onCrawlOptionsChange}
+          />
+        )
+        : (
+          <NoData onConfig={handleOnConfig} />
+        )}
+    </div>
+  )
+}
+export default React.memo(Website)

+ 36 - 0
web/app/components/datasets/create/website/no-data.tsx

@@ -0,0 +1,36 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others'
+import Button from '@/app/components/base/button'
+
+const I18N_PREFIX = 'datasetCreation.stepOne.website'
+
+type Props = {
+  onConfig: () => void
+}
+
+const NoData: FC<Props> = ({
+  onConfig,
+}) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className='max-w-[640px] p-6 rounded-2xl bg-gray-50'>
+      <div className='flex w-11 h-11 items-center justify-center bg-gray-50 rounded-xl border-[0.5px] border-gray-100 shadow-lg'>
+        🔥
+      </div>
+      <div className='my-2'>
+        <span className='text-gray-700 font-semibold'>{t(`${I18N_PREFIX}.fireCrawlNotConfigured`)}<Icon3Dots className='inline relative -top-3 -left-1.5' /></span>
+        <div className='mt-1 pb-3 text-gray-500 text-[13px] font-normal'>
+          {t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`)}
+        </div>
+      </div>
+      <Button type='primary' onClick={onConfig} className='!h-8 text-[13px] font-medium ' >
+        {t(`${I18N_PREFIX}.configure`)}
+      </Button>
+    </div>
+  )
+}
+export default React.memo(NoData)

+ 41 - 0
web/app/components/datasets/create/website/preview.tsx

@@ -0,0 +1,41 @@
+'use client'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import { XMarkIcon } from '@heroicons/react/20/solid'
+import s from '../file-preview/index.module.css'
+import type { CrawlResultItem } from '@/models/datasets'
+
+type IProps = {
+  payload: CrawlResultItem
+  hidePreview: () => void
+}
+
+const WebsitePreview = ({
+  payload,
+  hidePreview,
+}: IProps) => {
+  const { t } = useTranslation()
+
+  return (
+    <div className={cn(s.filePreview)}>
+      <div className={cn(s.previewHeader)}>
+        <div className={cn(s.title)}>
+          <span>{t('datasetCreation.stepOne.pagePreview')}</span>
+          <div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
+            <XMarkIcon className='h-4 w-4'></XMarkIcon>
+          </div>
+        </div>
+        <div className='leading-5 text-sm font-medium text-gray-900 break-words'>
+          {payload.title}
+        </div>
+        <div className='truncate leading-[18px] text-xs font-normal text-gray-500' title={payload.source_url}>{payload.source_url}</div>
+      </div>
+      <div className={cn(s.previewContent)}>
+        <div className={cn(s.fileContent)}>{payload.markdown}</div>
+      </div>
+    </div>
+  )
+}
+
+export default WebsitePreview

+ 10 - 0
web/app/components/datasets/documents/detail/settings/index.tsx

@@ -73,6 +73,16 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
             datasetId={datasetId}
             dataSourceType={documentDetail.data_source_type}
             notionPages={[currentPage]}
+            websitePages={[
+              {
+                title: documentDetail.name,
+                source_url: documentDetail.data_source_info?.url,
+                markdown: '',
+                description: '',
+              },
+            ]}
+            fireCrawlJobId={documentDetail.data_source_info?.job_id}
+            crawlOptions={documentDetail.data_source_info}
             indexingType={indexingTechnique || ''}
             isSetting
             documentDetail={documentDetail}

+ 4 - 1
web/app/components/datasets/documents/index.tsx

@@ -83,6 +83,8 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
   const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
   const [timerCanRun, setTimerCanRun] = useState(true)
   const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
+  const isDataSourceWeb = dataset?.data_source_type === DataSourceType.WEB
+  const isDataSourceFile = dataset?.data_source_type === DataSourceType.FILE
   const embeddingAvailable = !!dataset?.embedding_available
 
   const query = useMemo(() => {
@@ -211,7 +213,8 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
               <Button type='primary' onClick={routeToDocCreate} className='!h-8 !text-[13px] !shrink-0'>
                 <PlusIcon className='h-4 w-4 mr-2 stroke-current' />
                 {isDataSourceNotion && t('datasetDocuments.list.addPages')}
-                {!isDataSourceNotion && t('datasetDocuments.list.addFile')}
+                {isDataSourceWeb && t('datasetDocuments.list.addUrl')}
+                {isDataSourceFile && t('datasetDocuments.list.addFile')}
               </Button>
             )}
           </div>

+ 14 - 8
web/app/components/datasets/documents/list.tsx

@@ -13,6 +13,7 @@ import cn from 'classnames'
 import dayjs from 'dayjs'
 import { Edit03 } from '../../base/icons/src/vender/solid/general'
 import TooltipPlus from '../../base/tooltip-plus'
+import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel'
 import s from './style.module.css'
 import RenameModal from './rename-modal'
 import Switch from '@/app/components/base/switch'
@@ -26,7 +27,7 @@ import type { IndicatorProps } from '@/app/components/header/indicator'
 import Indicator from '@/app/components/header/indicator'
 import { asyncRunSafe } from '@/utils'
 import { formatNumber } from '@/utils/format'
-import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, unArchiveDocument } from '@/service/datasets'
+import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets'
 import NotionIcon from '@/app/components/base/notion-icon'
 import ProgressBar from '@/app/components/base/progress-bar'
 import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
@@ -146,7 +147,12 @@ export const OperationAction: FC<{
         opApi = disableDocument
         break
       case 'sync':
-        opApi = syncDocument
+        if (data_source_type === 'notion_import')
+          opApi = syncDocument
+
+        else
+          opApi = syncWebsite
+
         break
       default:
         opApi = deleteDocument
@@ -249,7 +255,7 @@ export const OperationAction: FC<{
                   <SettingsIcon />
                   <span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
                 </div>
-                {data_source_type === 'notion_import' && (
+                {['notion_import', DataSourceType.WEB].includes(data_source_type) && (
                   <div className={s.actionItem} onClick={() => onOperate('sync')}>
                     <SyncIcon />
                     <span className={s.actionName}>{t('datasetDocuments.list.action.sync')}</span>
@@ -282,7 +288,7 @@ export const OperationAction: FC<{
           </div>
         }
         btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
-        className={`!w-[200px] h-fit !z-20 ${className}`}
+        className={`flex justify-end !w-[200px] h-fit !z-20 ${className}`}
       />
     )}
     {showModal && <Modal isShow={showModal} onClose={() => setShowModal(false)} className={s.delModal} closable>
@@ -418,10 +424,10 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
               <td>
                 <div className='group flex items-center justify-between'>
                   <span className={s.tdValue}>
-                    {
-                      doc?.data_source_type === DataSourceType.NOTION
-                        ? <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
-                        : <div className={cn(s[`${doc?.data_source_info?.upload_file?.extension ?? fileType}Icon`], s.commonIcon, 'mr-1.5')}></div>
+                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
+                    }
+                    {doc?.data_source_type === DataSourceType.FILE && <div className={cn(s[`${doc?.data_source_info?.upload_file?.extension ?? fileType}Icon`], s.commonIcon, 'mr-1.5')}></div>}
+                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />
                     }
                     {
                       doc.name

+ 49 - 101
web/app/components/header/account-setting/data-source-page/data-source-notion/index.tsx

@@ -1,23 +1,34 @@
-import { useEffect, useState } from 'react'
+'use client'
+import type { FC } from 'react'
+import React, { useEffect, useState } from 'react'
 import useSWR from 'swr'
-import { useTranslation } from 'react-i18next'
-import { PlusIcon } from '@heroicons/react/24/solid'
-import cn from 'classnames'
-import Indicator from '../../../indicator'
-import Operate from './operate'
-import s from './style.module.css'
-import NotionIcon from '@/app/components/base/notion-icon'
+import Panel from '../panel'
+import { DataSourceType } from '../panel/types'
 import type { DataSourceNotion as TDataSourceNotion } from '@/models/common'
 import { useAppContext } from '@/context/app-context'
 import { fetchNotionConnection } from '@/service/common'
+import NotionIcon from '@/app/components/base/notion-icon'
 
-type DataSourceNotionProps = {
+const Icon: FC<{
+  src: string
+  name: string
+  className: string
+}> = ({ src, name, className }) => {
+  return (
+    <NotionIcon
+      src={src}
+      name={name}
+      className={className}
+    />
+  )
+}
+type Props = {
   workspaces: TDataSourceNotion[]
 }
-const DataSourceNotion = ({
+
+const DataSourceNotion: FC<Props> = ({
   workspaces,
-}: DataSourceNotionProps) => {
-  const { t } = useTranslation()
+}) => {
   const { isCurrentWorkspaceManager } = useAppContext()
   const [canConnectNotion, setCanConnectNotion] = useState(false)
   const { data } = useSWR(canConnectNotion ? '/oauth/data-source/notion' : null, fetchNotionConnection)
@@ -42,95 +53,32 @@ const DataSourceNotion = ({
     if (data?.data)
       window.location.href = data.data
   }, [data])
-
   return (
-    <div className='mb-2 border-[0.5px] border-gray-200 bg-gray-50 rounded-xl'>
-      <div className='flex items-center px-3 py-[9px]'>
-        <div className={cn(s['notion-icon'], 'w-8 h-8 mr-3 border border-gray-100 rounded-lg')} />
-        <div className='grow'>
-          <div className='leading-5 text-sm font-medium text-gray-800'>
-            {t('common.dataSource.notion.title')}
-          </div>
-          {
-            !connected && (
-              <div className='leading-5 text-xs text-gray-500'>
-                {t('common.dataSource.notion.description')}
-              </div>
-            )
-          }
-        </div>
-        {
-          connected
-            ? (
-              <div
-                className={
-                  `flex items-center ml-3 px-3 h-7 bg-white border border-gray-200
-                  rounded-md text-xs font-medium text-gray-700
-                  ${isCurrentWorkspaceManager ? 'cursor-pointer' : 'grayscale opacity-50 cursor-default'}`
-                }
-                onClick={handleConnectNotion}
-              >
-                {t('common.dataSource.connect')}
-              </div>
-            )
-            : (
-              <div
-                className={
-                  `flex items-center px-3 py-1 min-h-7 bg-white border-[0.5px] border-gray-200 text-xs font-medium text-primary-600 rounded-md
-                  ${isCurrentWorkspaceManager ? 'cursor-pointer' : 'grayscale opacity-50 cursor-default'}`
-                }
-                onClick={handleConnectNotion}
-              >
-                <PlusIcon className='w-[14px] h-[14px] mr-[5px]' />
-                {t('common.dataSource.notion.addWorkspace')}
-              </div>
-            )
-        }
-      </div>
-      {
-        connected && (
-          <div className='flex items-center px-3 h-[18px]'>
-            <div className='text-xs font-medium text-gray-500'>
-              {t('common.dataSource.notion.connectedWorkspace')}
-            </div>
-            <div className='grow ml-3 border-t border-t-gray-100' />
-          </div>
-        )
-      }
-      {
-        connected && (
-          <div className='px-3 pt-2 pb-3'>
-            {
-              workspaces.map(workspace => (
-                <div className={cn(s['workspace-item'], 'flex items-center mb-1 py-1 pr-1 bg-white rounded-lg')} key={workspace.id}>
-                  <NotionIcon
-                    className='ml-3 mr-[6px]'
-                    src={workspace.source_info.workspace_icon}
-                    name={workspace.source_info.workspace_name}
-                  />
-                  <div className='grow py-[7px] leading-[18px] text-[13px] font-medium text-gray-700 truncate' title={workspace.source_info.workspace_name}>{workspace.source_info.workspace_name}</div>
-                  {
-                    workspace.is_bound
-                      ? <Indicator className='shrink-0 mr-[6px]' />
-                      : <Indicator className='shrink-0 mr-[6px]' color='yellow' />
-                  }
-                  <div className='shrink-0 mr-3 text-xs font-medium'>
-                    {
-                      workspace.is_bound
-                        ? t('common.dataSource.notion.connected')
-                        : t('common.dataSource.notion.disconnected')
-                    }
-                  </div>
-                  <div className='mr-2 w-[1px] h-3 bg-gray-100' />
-                  <Operate workspace={workspace} onAuthAgain={handleAuthAgain} />
-                </div>
-              ))
-            }
-          </div>
-        )
-      }
-    </div>
+    <Panel
+      type={DataSourceType.notion}
+      isConfigured={connected}
+      onConfigure={handleConnectNotion}
+      readonly={!isCurrentWorkspaceManager}
+      isSupportList
+      configuredList={workspaces.map(workspace => ({
+        id: workspace.id,
+        logo: ({ className }: { className: string }) => (
+          <Icon
+            src={workspace.source_info.workspace_icon!}
+            name={workspace.source_info.workspace_name}
+            className={className}
+          />),
+        name: workspace.source_info.workspace_name,
+        isActive: workspace.is_bound,
+        notionConfig: {
+          total: workspace.source_info.total || 0,
+        },
+      }))}
+      onRemove={() => { }} // handled in operation/index.tsx
+      notionActions={{
+        onChangeAuthorizedPage: handleAuthAgain,
+      }}
+    />
   )
 }
-
-export default DataSourceNotion
+export default React.memo(DataSourceNotion)

+ 8 - 6
web/app/components/header/account-setting/data-source-page/data-source-notion/operate/index.tsx

@@ -6,17 +6,19 @@ import { EllipsisHorizontalIcon } from '@heroicons/react/24/solid'
 import { Menu, Transition } from '@headlessui/react'
 import { syncDataSourceNotion, updateDataSourceNotionAction } from '@/service/common'
 import Toast from '@/app/components/base/toast'
-import type { DataSourceNotion } from '@/models/common'
 import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files'
 import { RefreshCw05 } from '@/app/components/base/icons/src/vender/line/arrows'
 import { Trash03 } from '@/app/components/base/icons/src/vender/line/general'
 
 type OperateProps = {
-  workspace: DataSourceNotion
+  payload: {
+    id: string
+    total: number
+  }
   onAuthAgain: () => void
 }
 export default function Operate({
-  workspace,
+  payload,
   onAuthAgain,
 }: OperateProps) {
   const itemClassName = `
@@ -37,11 +39,11 @@ export default function Operate({
     mutate({ url: 'data-source/integrates' })
   }
   const handleSync = async () => {
-    await syncDataSourceNotion({ url: `/oauth/data-source/notion/${workspace.id}/sync` })
+    await syncDataSourceNotion({ url: `/oauth/data-source/notion/${payload.id}/sync` })
     updateIntegrates()
   }
   const handleRemove = async () => {
-    await updateDataSourceNotionAction({ url: `/data-source/integrates/${workspace.id}/disable` })
+    await updateDataSourceNotionAction({ url: `/data-source/integrates/${payload.id}/disable` })
     updateIntegrates()
   }
 
@@ -79,7 +81,7 @@ export default function Operate({
                       <div>
                         <div className='leading-5'>{t('common.dataSource.notion.changeAuthorizedPages')}</div>
                         <div className='leading-5 text-xs text-gray-500'>
-                          {workspace.source_info.total} {t('common.dataSource.notion.pagesAuthorized')}
+                          {payload.total} {t('common.dataSource.notion.pagesAuthorized')}
                         </div>
                       </div>
                     </div>

+ 163 - 0
web/app/components/header/account-setting/data-source-page/data-source-website/config-firecrawl-modal.tsx

@@ -0,0 +1,163 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import {
+  PortalToFollowElem,
+  PortalToFollowElemContent,
+} from '@/app/components/base/portal-to-follow-elem'
+import { Lock01 } from '@/app/components/base/icons/src/vender/solid/security'
+import Button from '@/app/components/base/button'
+import type { FirecrawlConfig } from '@/models/common'
+import Field from '@/app/components/datasets/create/website/firecrawl/base/field'
+import Toast from '@/app/components/base/toast'
+import { createFirecrawlApiKey } from '@/service/datasets'
+import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/general'
+type Props = {
+  onCancel: () => void
+  onSaved: () => void
+}
+
+const I18N_PREFIX = 'datasetCreation.firecrawl'
+
+const DEFAULT_BASE_URL = 'https://api.firecrawl.dev'
+
+const ConfigFirecrawlModal: FC<Props> = ({
+  onCancel,
+  onSaved,
+}) => {
+  const { t } = useTranslation()
+  const [isSaving, setIsSaving] = useState(false)
+  const [config, setConfig] = useState<FirecrawlConfig>({
+    api_key: '',
+    base_url: '',
+  })
+
+  const handleConfigChange = useCallback((key: string) => {
+    return (value: string | number) => {
+      setConfig(prev => ({ ...prev, [key]: value as string }))
+    }
+  }, [])
+
+  const handleSave = useCallback(async () => {
+    if (isSaving)
+      return
+    let errorMsg = ''
+    if (config.base_url && !((config.base_url.startsWith('http://') || config.base_url.startsWith('https://'))))
+      errorMsg = t('common.errorMsg.urlError')
+    if (!errorMsg) {
+      if (!config.api_key) {
+        errorMsg = t('common.errorMsg.fieldRequired', {
+          field: 'API Key',
+        })
+      }
+      else if (!config.api_key.startsWith('fc-')) {
+        errorMsg = t(`${I18N_PREFIX}.apiKeyFormatError`)
+      }
+    }
+
+    if (errorMsg) {
+      Toast.notify({
+        type: 'error',
+        message: errorMsg,
+      })
+      return
+    }
+    const postData = {
+      category: 'website',
+      provider: 'firecrawl',
+      credentials: {
+        auth_type: 'bearer',
+        config: {
+          api_key: config.api_key,
+          base_url: config.base_url || DEFAULT_BASE_URL,
+        },
+      },
+    }
+    try {
+      setIsSaving(true)
+      await createFirecrawlApiKey(postData)
+      Toast.notify({
+        type: 'success',
+        message: t('common.api.success'),
+      })
+    }
+    finally {
+      setIsSaving(false)
+    }
+
+    onSaved()
+  }, [config.api_key, config.base_url, onSaved, t, isSaving])
+
+  return (
+    <PortalToFollowElem open>
+      <PortalToFollowElemContent className='w-full h-full z-[60]'>
+        <div className='fixed inset-0 flex items-center justify-center bg-black/[.25]'>
+          <div className='mx-2 w-[640px] max-h-[calc(100vh-120px)] bg-white shadow-xl rounded-2xl overflow-y-auto'>
+            <div className='px-8 pt-8'>
+              <div className='flex justify-between items-center mb-4'>
+                <div className='text-xl font-semibold text-gray-900'>{t(`${I18N_PREFIX}.configFirecrawl`)}</div>
+              </div>
+
+              <div className='space-y-4'>
+                <Field
+                  label='API Key'
+                  labelClassName='!text-sm'
+                  isRequired
+                  value={config.api_key}
+                  onChange={handleConfigChange('api_key')}
+                  placeholder={t(`${I18N_PREFIX}.apiKeyPlaceholder`)!}
+                />
+                <Field
+                  label='Base URL'
+                  labelClassName='!text-sm'
+                  value={config.base_url}
+                  onChange={handleConfigChange('base_url')}
+                  placeholder={DEFAULT_BASE_URL}
+                />
+              </div>
+              <div className='my-8 flex justify-between items-center h-8'>
+                <a className='flex items-center space-x-1 leading-[18px] text-xs font-normal text-[#155EEF]' target='_blank' href='https://www.firecrawl.dev/account'>
+                  <span>{t(`${I18N_PREFIX}.getApiKeyLinkText`)}</span>
+                  <LinkExternal02 className='w-3 h-3' />
+                </a>
+                <div className='flex'>
+                  <Button
+                    className='mr-2 h-9 text-sm font-medium text-gray-700'
+                    onClick={onCancel}
+                  >
+                    {t('common.operation.cancel')}
+                  </Button>
+                  <Button
+                    className='h-9 text-sm font-medium'
+                    type='primary'
+                    onClick={handleSave}
+                    loading={isSaving}
+                  >
+                    {t('common.operation.save')}
+                  </Button>
+                </div>
+
+              </div>
+            </div>
+            <div className='border-t-[0.5px] border-t-black/5'>
+              <div className='flex justify-center items-center py-3 bg-gray-50 text-xs text-gray-500'>
+                <Lock01 className='mr-1 w-3 h-3 text-gray-500' />
+                {t('common.modelProvider.encrypted.front')}
+                <a
+                  className='text-primary-600 mx-1'
+                  target='_blank' rel='noopener noreferrer'
+                  href='https://pycryptodome.readthedocs.io/en/latest/src/cipher/oaep.html'
+                >
+                  PKCS1_OAEP
+                </a>
+                {t('common.modelProvider.encrypted.back')}
+              </div>
+            </div>
+          </div>
+        </div>
+      </PortalToFollowElemContent>
+    </PortalToFollowElem>
+  )
+}
+export default React.memo(ConfigFirecrawlModal)

+ 82 - 0
web/app/components/header/account-setting/data-source-page/data-source-website/index.tsx

@@ -0,0 +1,82 @@
+'use client'
+import type { FC } from 'react'
+import React, { useCallback, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { useBoolean } from 'ahooks'
+import cn from 'classnames'
+import Panel from '../panel'
+import { DataSourceType } from '../panel/types'
+import ConfigFirecrawlModal from './config-firecrawl-modal'
+import { fetchFirecrawlApiKey, removeFirecrawlApiKey } from '@/service/datasets'
+
+import type {
+  DataSourceWebsiteItem,
+} from '@/models/common'
+import { useAppContext } from '@/context/app-context'
+
+import {
+  WebsiteProvider,
+} from '@/models/common'
+import Toast from '@/app/components/base/toast'
+
+type Props = {}
+
+const DataSourceWebsite: FC<Props> = () => {
+  const { t } = useTranslation()
+  const { isCurrentWorkspaceManager } = useAppContext()
+  const [list, setList] = useState<DataSourceWebsiteItem[]>([])
+  const checkSetApiKey = useCallback(async () => {
+    const res = await fetchFirecrawlApiKey() as any
+    const list = res.settings.filter((item: DataSourceWebsiteItem) => item.provider === WebsiteProvider.fireCrawl && !item.disabled)
+    setList(list)
+  }, [])
+
+  useEffect(() => {
+    checkSetApiKey()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const [isShowConfig, {
+    setTrue: showConfig,
+    setFalse: hideConfig,
+  }] = useBoolean(false)
+
+  const handleAdded = useCallback(() => {
+    checkSetApiKey()
+    hideConfig()
+  }, [checkSetApiKey, hideConfig])
+
+  const handleRemove = useCallback(async () => {
+    await removeFirecrawlApiKey(list[0].id)
+    setList([])
+    Toast.notify({
+      type: 'success',
+      message: t('common.api.remove'),
+    })
+  }, [list, t])
+
+  return (
+    <>
+      <Panel
+        type={DataSourceType.website}
+        isConfigured={list.length > 0}
+        onConfigure={showConfig}
+        readonly={!isCurrentWorkspaceManager}
+        configuredList={list.map(item => ({
+          id: item.id,
+          logo: ({ className }: { className: string }) => (
+            <div className={cn(className, 'flex items-center justify-center w-5 h-5 bg-white border border-gray-100 text-xs font-medium text-gray-500 rounded ml-3')}>🔥</div>
+          ),
+          name: 'FireCrawl',
+          isActive: true,
+        }))}
+        onRemove={handleRemove}
+      />
+      {isShowConfig && (
+        <ConfigFirecrawlModal onSaved={handleAdded} onCancel={hideConfig} />
+      )}
+    </>
+
+  )
+}
+export default React.memo(DataSourceWebsite)

+ 2 - 0
web/app/components/header/account-setting/data-source-page/index.tsx

@@ -1,6 +1,7 @@
 import useSWR from 'swr'
 import { useTranslation } from 'react-i18next'
 import DataSourceNotion from './data-source-notion'
+import DataSourceWebsite from './data-source-website'
 import { fetchDataSource } from '@/service/common'
 
 export default function DataSourcePage() {
@@ -12,6 +13,7 @@ export default function DataSourcePage() {
     <div className='mb-8'>
       <div className='mb-2 text-sm font-medium text-gray-900'>{t('common.dataSource.add')}</div>
       <DataSourceNotion workspaces={notionWorkspaces} />
+      <DataSourceWebsite />
     </div>
   )
 }

+ 78 - 0
web/app/components/header/account-setting/data-source-page/panel/config-item.tsx

@@ -0,0 +1,78 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import cn from 'classnames'
+import Indicator from '../../../indicator'
+import Operate from '../data-source-notion/operate'
+import { DataSourceType } from './types'
+import s from './style.module.css'
+import { Trash03 } from '@/app/components/base/icons/src/vender/line/general'
+
+export type ConfigItemType = {
+  id: string
+  logo: any
+  name: string
+  isActive: boolean
+  notionConfig?: {
+    total: number
+  }
+}
+
+type Props = {
+  type: DataSourceType
+  payload: ConfigItemType
+  onRemove: () => void
+  notionActions?: {
+    onChangeAuthorizedPage: () => void
+  }
+}
+
+const ConfigItem: FC<Props> = ({
+  type,
+  payload,
+  onRemove,
+  notionActions,
+}) => {
+  const { t } = useTranslation()
+  const isNotion = type === DataSourceType.notion
+  const isWebsite = type === DataSourceType.website
+  const onChangeAuthorizedPage = notionActions?.onChangeAuthorizedPage || function () { }
+
+  return (
+    <div className={cn(s['workspace-item'], 'flex items-center mb-1 py-1 pr-1 bg-white rounded-lg')} key={payload.id}>
+      <payload.logo className='ml-3 mr-1.5' />
+      <div className='grow py-[7px] leading-[18px] text-[13px] font-medium text-gray-700 truncate' title={payload.name}>{payload.name}</div>
+      {
+        payload.isActive
+          ? <Indicator className='shrink-0 mr-[6px]' />
+          : <Indicator className='shrink-0 mr-[6px]' color='yellow' />
+      }
+      <div className='shrink-0 mr-3 text-xs font-medium uppercase'>
+        {
+          payload.isActive
+            ? t(isNotion ? 'common.dataSource.notion.connected' : 'common.dataSource.website.active')
+            : t(isNotion ? 'common.dataSource.notion.disconnected' : 'common.dataSource.website.inactive')
+        }
+      </div>
+      <div className='mr-2 w-[1px] h-3 bg-gray-100' />
+      {isNotion && (
+        <Operate payload={{
+          id: payload.id,
+          total: payload.notionConfig?.total || 0,
+        }} onAuthAgain={onChangeAuthorizedPage}
+        />
+      )}
+
+      {
+        isWebsite && (
+          <div className='p-2 text-gray-500 cursor-pointer rounded-md hover:bg-black/5' onClick={onRemove} >
+            <Trash03 className='w-4 h-4 ' />
+          </div>
+        )
+      }
+
+    </div>
+  )
+}
+export default React.memo(ConfigItem)

+ 138 - 0
web/app/components/header/account-setting/data-source-page/panel/index.tsx

@@ -0,0 +1,138 @@
+'use client'
+import type { FC } from 'react'
+import React from 'react'
+import { useTranslation } from 'react-i18next'
+import { PlusIcon } from '@heroicons/react/24/solid'
+import cn from 'classnames'
+import type { ConfigItemType } from './config-item'
+import ConfigItem from './config-item'
+
+import s from './style.module.css'
+import { DataSourceType } from './types'
+
+type Props = {
+  type: DataSourceType
+  isConfigured: boolean
+  onConfigure: () => void
+  readonly: boolean
+  isSupportList?: boolean
+  configuredList: ConfigItemType[]
+  onRemove: () => void
+  notionActions?: {
+    onChangeAuthorizedPage: () => void
+  }
+}
+
+const Panel: FC<Props> = ({
+  type,
+  isConfigured,
+  onConfigure,
+  readonly,
+  configuredList,
+  isSupportList,
+  onRemove,
+  notionActions,
+}) => {
+  const { t } = useTranslation()
+  const isNotion = type === DataSourceType.notion
+  const isWebsite = type === DataSourceType.website
+
+  return (
+    <div className='mb-2 border-[0.5px] border-gray-200 bg-gray-50 rounded-xl'>
+      <div className='flex items-center px-3 py-[9px]'>
+        <div className={cn(s[`${type}-icon`], 'w-8 h-8 mr-3 border border-gray-100 rounded-lg')} />
+        <div className='grow'>
+          <div className='flex items-center h-5'>
+            <div className='text-sm font-medium text-gray-800'>{t(`common.dataSource.${type}.title`)}</div>
+            {isWebsite && (
+              <div className='ml-1 leading-[18px] px-1.5 rounded-md bg-white border border-gray-100 text-xs font-medium text-gray-700'>
+                <span className='text-gray-500'>{t('common.dataSource.website.with')}</span> 🔥 FireCrawl
+              </div>
+            )}
+          </div>
+          {
+            !isConfigured && (
+              <div className='leading-5 text-xs text-gray-500'>
+                {t(`common.dataSource.${type}.description`)}
+              </div>
+            )
+          }
+        </div>
+        {isNotion && (
+          <>
+            {
+              isConfigured
+                ? (
+                  <div
+                    className={
+                      `flex items-center ml-3 px-3 h-7 bg-white border border-gray-200
+                  rounded-md text-xs font-medium text-gray-700
+                  ${!readonly ? 'cursor-pointer' : 'grayscale opacity-50 cursor-default'}`
+                    }
+                    onClick={onConfigure}
+                  >
+                    {t('common.dataSource.configure')}
+                  </div>
+                )
+                : (
+                  <>
+                    {isSupportList && <div
+                      className={
+                        `flex items-center px-3 py-1 min-h-7 bg-white border-[0.5px] border-gray-200 text-xs font-medium text-primary-600 rounded-md
+                  ${!readonly ? 'cursor-pointer' : 'grayscale opacity-50 cursor-default'}`
+                      }
+                      onClick={onConfigure}
+                    >
+                      <PlusIcon className='w-[14px] h-[14px] mr-[5px]' />
+                      {t('common.dataSource.notion.addWorkspace')}
+                    </div>}
+                  </>
+                )
+            }
+          </>
+        )}
+
+        {isWebsite && !isConfigured && (
+          <div
+            className={
+              `flex items-center ml-3 px-3 h-7 bg-white border border-gray-200
+        rounded-md text-xs font-medium text-gray-700
+        ${!readonly ? 'cursor-pointer' : 'grayscale opacity-50 cursor-default'}`
+            }
+            onClick={onConfigure}
+          >
+            {t('common.dataSource.configure')}
+          </div>
+        )}
+
+      </div>
+      {
+        isConfigured && (
+          <div className='flex items-center px-3 h-[18px]'>
+            <div className='text-xs font-medium text-gray-500'>
+              {isNotion ? t('common.dataSource.notion.connectedWorkspace') : t('common.dataSource.website.configuredCrawlers')}
+            </div>
+            <div className='grow ml-3 border-t border-t-gray-100' />
+          </div>
+        )
+      }
+      {
+        isConfigured && (
+          <div className='px-3 pt-2 pb-3'>
+            {
+              configuredList.map(item => (
+                <ConfigItem
+                  key={item.id}
+                  type={type}
+                  payload={item}
+                  onRemove={onRemove}
+                  notionActions={notionActions} />
+              ))
+            }
+          </div>
+        )
+      }
+    </div>
+  )
+}
+export default React.memo(Panel)

+ 5 - 0
web/app/components/header/account-setting/data-source-page/data-source-notion/style.module.css → web/app/components/header/account-setting/data-source-page/panel/style.module.css

@@ -3,6 +3,11 @@
   background-size: 20px 20px;
 }
 
+.website-icon {
+  background: #ffffff url(../../../../datasets/create/assets/web.svg) center center no-repeat;
+  background-size: 20px 20px;
+}
+
 .workspace-item {
   box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05);
 }

+ 4 - 0
web/app/components/header/account-setting/data-source-page/panel/types.ts

@@ -0,0 +1,4 @@
+export enum DataSourceType {
+  notion = 'notion',
+  website = 'website',
+}

+ 13 - 0
web/i18n/en-US/common.ts

@@ -37,6 +37,10 @@ const translation = {
     duplicate: 'Duplicate',
     rename: 'Rename',
   },
+  errorMsg: {
+    fieldRequired: '{{field}} is required',
+    urlError: 'url should start with http:// or https://',
+  },
   placeholder: {
     input: 'Please enter',
     select: 'Please select',
@@ -360,6 +364,7 @@ const translation = {
   dataSource: {
     add: 'Add a data source',
     connect: 'Connect',
+    configure: 'Configure',
     notion: {
       title: 'Notion',
       description: 'Using Notion as a data source for the Knowledge.',
@@ -379,6 +384,14 @@ const translation = {
         preview: 'PREVIEW',
       },
     },
+    website: {
+      title: 'Website',
+      description: 'Import content from websites using web crawler.',
+      with: 'With',
+      configuredCrawlers: 'Configured crawlers',
+      active: 'Active',
+      inactive: 'Inactive',
+    },
   },
   plugin: {
     serpapi: {

+ 32 - 0
web/i18n/en-US/dataset-creation.ts

@@ -11,6 +11,12 @@ const translation = {
   error: {
     unavailable: 'This Knowledge is not available',
   },
+  firecrawl: {
+    configFirecrawl: 'Configure 🔥Firecrawl',
+    apiKeyPlaceholder: 'API key from firecrawl.dev, starting with "fc-"',
+    apiKeyFormatError: 'API key should start with "fc-"',
+    getApiKeyLinkText: 'Get your API key from firecrawl.dev',
+  },
   stepOne: {
     filePreview: 'File Preview',
     pagePreview: 'Page Preview',
@@ -50,6 +56,30 @@ const translation = {
       confirmButton: 'Create',
       failed: 'Creation failed',
     },
+    website: {
+      fireCrawlNotConfigured: 'Firecrawl is not configured',
+      fireCrawlNotConfiguredDescription: 'Configure Firecrawl with API key to use it.',
+      configure: 'Configure',
+      run: 'Run',
+      firecrawlTitle: 'Extract web content with 🔥Firecrawl',
+      firecrawlDoc: 'Firecrawl docs',
+      firecrawlDocLink: 'https://docs.dify.ai/guides/knowledge-base/sync_from_website',
+      options: 'Options',
+      crawlSubPage: 'Crawl sub-pages',
+      limit: 'Limit',
+      maxDepth: 'Max depth',
+      excludePaths: 'Exclude paths',
+      includeOnlyPaths: 'Include only paths',
+      extractOnlyMainContent: 'Extract only main content (no headers, navs, footers, etc.)',
+      exceptionErrorTitle: 'An exception occurred while running Firecrawl job:',
+      unknownError: 'Unknown error',
+      totalPageScraped: 'Total pages scraped:',
+      selectAll: 'Select All',
+      resetAll: 'Reset All',
+      scrapTimeInfo: 'Scraped {{total}} pages in total within {{time}}s',
+      preview: 'Preview',
+      maxDepthTooltip: 'Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.',
+    },
   },
   stepTwo: {
     segmentation: 'Chunk settings',
@@ -86,9 +116,11 @@ const translation = {
     calculating: 'Calculating...',
     fileSource: 'Preprocess documents',
     notionSource: 'Preprocess pages',
+    websiteSource: 'Preprocess website',
     other: 'and other ',
     fileUnit: ' files',
     notionUnit: ' pages',
+    webpageUnit: ' pages',
     previousStep: 'Previous step',
     nextStep: 'Save & Process',
     save: 'Save & Process',

+ 2 - 1
web/i18n/en-US/dataset-documents.ts

@@ -2,8 +2,9 @@ const translation = {
   list: {
     title: 'Documents',
     desc: 'All files of the Knowledge are shown here, and the entire Knowledge can be linked to Dify citations or indexed via the Chat plugin.',
-    addFile: 'add file',
+    addFile: 'Add file',
     addPages: 'Add Pages',
+    addUrl: 'Add URL',
     table: {
       header: {
         fileName: 'FILE NAME',

+ 13 - 0
web/i18n/zh-Hans/common.ts

@@ -37,6 +37,10 @@ const translation = {
     duplicate: '复制',
     rename: '重命名',
   },
+  errorMsg: {
+    fieldRequired: '{{field}} 为必填项',
+    urlError: 'url 应该以 http:// 或 https:// 开头',
+  },
   placeholder: {
     input: '请输入',
     select: '请选择',
@@ -356,6 +360,7 @@ const translation = {
   dataSource: {
     add: '添加数据源',
     connect: '绑定',
+    configure: '配置',
     notion: {
       title: 'Notion',
       description: '使用 Notion 作为知识库的数据源。',
@@ -375,6 +380,14 @@ const translation = {
         preview: '预览',
       },
     },
+    website: {
+      title: '网站',
+      description: '使用网络爬虫从网站导入内容。',
+      with: '使用',
+      configuredCrawlers: '已配置的爬虫',
+      active: '可用',
+      inactive: '不可用',
+    },
   },
   plugin: {
     serpapi: {

+ 32 - 0
web/i18n/zh-Hans/dataset-creation.ts

@@ -11,6 +11,12 @@ const translation = {
   error: {
     unavailable: '该知识库不可用',
   },
+  firecrawl: {
+    configFirecrawl: '配置 🔥Firecrawl',
+    apiKeyPlaceholder: '从 firecrawl.dev 获取 API Key,以 "fc-" 开头',
+    apiKeyFormatError: 'API Key 应以 "fc-" 开头',
+    getApiKeyLinkText: '从 firecrawl.dev 获取您的 API Key',
+  },
   stepOne: {
     filePreview: '文件预览',
     pagePreview: '页面预览',
@@ -50,6 +56,30 @@ const translation = {
       confirmButton: '创建',
       failed: '创建失败',
     },
+    website: {
+      fireCrawlNotConfigured: 'Firecrawl 未配置',
+      fireCrawlNotConfiguredDescription: '请配置 Firecrawl 的 API 密钥以使用它。',
+      configure: '配置',
+      run: '运行',
+      firecrawlTitle: '使用 🔥Firecrawl 提取网页内容',
+      firecrawlDoc: 'Firecrawl 文档',
+      firecrawlDocLink: 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/sync_from_website',
+      options: '选项',
+      crawlSubPage: '爬取子页面',
+      limit: '限制数量',
+      maxDepth: '最大深度',
+      excludePaths: '排除路径',
+      includeOnlyPaths: '仅包含路径',
+      extractOnlyMainContent: '仅提取主要内容(无标题、导航、页脚等)',
+      exceptionErrorTitle: '运行 Firecrawl 时发生异常:',
+      unknownError: '未知错误',
+      totalPageScraped: '抓取页面总数:',
+      selectAll: '全选',
+      resetAll: '重置全部',
+      scrapTimeInfo: '总共在 {{time}}秒 内抓取了 {{total}} 个页面',
+      preview: '预览',
+      maxDepthTooltip: '最大抓取深度。深度 1 表示 Base URL,深度 2 表示 Base URL及其直接子页面,依此类推。',
+    },
   },
   stepTwo: {
     segmentation: '分段设置',
@@ -86,9 +116,11 @@ const translation = {
     calculating: '计算中...',
     fileSource: '预处理文档',
     notionSource: '预处理页面',
+    websiteSource: '预处理页面',
     other: '和其他 ',
     fileUnit: ' 个文件',
     notionUnit: ' 个页面',
+    webpageUnit: ' 个页面',
     previousStep: '上一步',
     nextStep: '保存并处理',
     save: '保存并处理',

+ 1 - 0
web/i18n/zh-Hans/dataset-documents.ts

@@ -4,6 +4,7 @@ const translation = {
     desc: '知识库的所有文件都在这里显示,整个知识库都可以链接到 Dify 引用或通过 Chat 插件进行索引。',
     addFile: '添加文件',
     addPages: '添加页面',
+    addUrl: '添加 URL',
     table: {
       header: {
         fileName: '文件名',

+ 33 - 0
web/models/common.ts

@@ -172,6 +172,39 @@ export type DataSourceNotion = {
   source_info: DataSourceNotionWorkspace
 }
 
+export enum DataSourceCategory {
+  website = 'website',
+}
+export enum WebsiteProvider {
+  fireCrawl = 'firecrawl',
+}
+
+export type WebsiteCredentials = {
+  auth_type: 'bearer'
+  config: {
+    base_url: string
+    api_key: string
+  }
+}
+
+export type FirecrawlConfig = {
+  api_key: string
+  base_url: string
+}
+
+export type DataSourceWebsiteItem = {
+  id: string
+  category: DataSourceCategory.website
+  provider: WebsiteProvider
+  credentials: WebsiteCredentials
+  disabled: boolean
+  created_at: number
+  updated_at: number
+}
+export type DataSourceWebsite = {
+  settings: DataSourceWebsiteItem[]
+}
+
 export type GithubRepo = {
   stargazers_count: number
 }

+ 24 - 1
web/models/datasets.ts

@@ -5,7 +5,7 @@ import type { Tag } from '@/app/components/base/tag-management/constant'
 export enum DataSourceType {
   FILE = 'upload_file',
   NOTION = 'notion_import',
-  WEB = 'web_import',
+  WEB = 'website_crawl',
 }
 
 export type DataSet = {
@@ -39,6 +39,22 @@ export type CustomFile = File & {
   created_at?: number
 }
 
+export type CrawlOptions = {
+  crawl_sub_pages: boolean
+  only_main_content: boolean
+  includes: string
+  excludes: string
+  limit: number | string
+  max_depth: number | string
+}
+
+export type CrawlResultItem = {
+  title: string
+  markdown: string
+  description: string
+  source_url: string
+}
+
 export type FileItem = {
   fileID: string
   file: CustomFile
@@ -149,6 +165,8 @@ export type DataSourceInfo = {
     extension: string
   }
   notion_page_icon?: string
+  job_id: string
+  url: string
 }
 
 export type InitialDocumentDetail = {
@@ -219,6 +237,11 @@ export type DataSource = {
     file_info_list?: {
       file_ids: string[]
     }
+    website_info_list?: {
+      provider: string
+      job_id: string
+      urls: string[]
+    }
   }
 }
 

+ 35 - 0
web/service/datasets.ts

@@ -152,6 +152,10 @@ export const syncDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId,
   return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/notion/sync`)
 }
 
+export const syncWebsite: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
+  return get<CommonResponse>(`/datasets/${datasetId}/documents/${documentId}/website-sync`)
+}
+
 export const preImportNotionPages: Fetcher<{ notion_info: DataSourceNotionWorkspace[] }, { url: string; datasetId?: string }> = ({ url, datasetId }) => {
   return get<{ notion_info: DataSourceNotionWorkspace[] }>(url, { params: { dataset_id: datasetId } })
 }
@@ -227,6 +231,37 @@ export const fetchDatasetApiBaseUrl: Fetcher<{ api_base_url: string }, string> =
   return get<{ api_base_url: string }>(url)
 }
 
+export const fetchFirecrawlApiKey = () => {
+  return get<CommonResponse>('api-key-auth/data-source')
+}
+
+export const createFirecrawlApiKey: Fetcher<CommonResponse, Record<string, any>> = (body) => {
+  return post<CommonResponse>('api-key-auth/data-source/binding', { body })
+}
+
+export const removeFirecrawlApiKey: Fetcher<CommonResponse, string> = (id: string) => {
+  return del<CommonResponse>(`api-key-auth/data-source/${id}`)
+}
+
+export const createFirecrawlTask: Fetcher<CommonResponse, Record<string, any>> = (body) => {
+  return post<CommonResponse>('website/crawl', {
+    body: {
+      ...body,
+      provider: 'firecrawl',
+    },
+  })
+}
+
+export const checkFirecrawlTaskStatus: Fetcher<CommonResponse, string> = (jobId: string) => {
+  return get<CommonResponse>(`website/crawl/status/${jobId}`, {
+    params: {
+      provider: 'firecrawl',
+    },
+  }, {
+    silent: true,
+  })
+}
+
 type FileTypesRes = {
   allowed_extensions: string[]
 }