utils.ts 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import { uniq } from 'lodash-es'
  2. import type { MultipleRetrievalConfig } from './types'
  3. import type {
  4. DataSet,
  5. SelectedDatasetsMode,
  6. } from '@/models/datasets'
  7. import {
  8. DEFAULT_WEIGHTED_SCORE,
  9. RerankingModeEnum,
  10. } from '@/models/datasets'
  11. import { RETRIEVE_METHOD } from '@/types/app'
  12. import { DATASET_DEFAULT } from '@/config'
  13. export const checkNodeValid = () => {
  14. return true
  15. }
  16. export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
  17. let allHighQuality = true
  18. let allHighQualityVectorSearch = true
  19. let allHighQualityFullTextSearch = true
  20. let allEconomic = true
  21. let mixtureHighQualityAndEconomic = true
  22. let allExternal = true
  23. let allInternal = true
  24. let mixtureInternalAndExternal = true
  25. let inconsistentEmbeddingModel = false
  26. if (!datasets.length) {
  27. allHighQuality = false
  28. allHighQualityVectorSearch = false
  29. allHighQualityFullTextSearch = false
  30. allEconomic = false
  31. mixtureHighQualityAndEconomic = false
  32. inconsistentEmbeddingModel = false
  33. allExternal = false
  34. allInternal = false
  35. mixtureInternalAndExternal = false
  36. }
  37. datasets.forEach((dataset) => {
  38. if (dataset.indexing_technique === 'economy') {
  39. allHighQuality = false
  40. allHighQualityVectorSearch = false
  41. allHighQualityFullTextSearch = false
  42. }
  43. if (dataset.indexing_technique === 'high_quality') {
  44. allEconomic = false
  45. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
  46. allHighQualityVectorSearch = false
  47. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
  48. allHighQualityFullTextSearch = false
  49. }
  50. if (dataset.provider !== 'external') {
  51. allExternal = false
  52. }
  53. else {
  54. allInternal = false
  55. allHighQuality = false
  56. allHighQualityVectorSearch = false
  57. allHighQualityFullTextSearch = false
  58. mixtureHighQualityAndEconomic = false
  59. }
  60. })
  61. if (allExternal || allInternal)
  62. mixtureInternalAndExternal = false
  63. if (allHighQuality || allEconomic)
  64. mixtureHighQualityAndEconomic = false
  65. if (allHighQuality)
  66. inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
  67. return {
  68. allHighQuality,
  69. allHighQualityVectorSearch,
  70. allHighQualityFullTextSearch,
  71. allEconomic,
  72. mixtureHighQualityAndEconomic,
  73. allInternal,
  74. allExternal,
  75. mixtureInternalAndExternal,
  76. inconsistentEmbeddingModel,
  77. } as SelectedDatasetsMode
  78. }
  79. export const getMultipleRetrievalConfig = (multipleRetrievalConfig: MultipleRetrievalConfig, selectedDatasets: DataSet[]) => {
  80. const {
  81. allHighQuality,
  82. allHighQualityVectorSearch,
  83. allHighQualityFullTextSearch,
  84. allEconomic,
  85. mixtureHighQualityAndEconomic,
  86. allInternal,
  87. allExternal,
  88. mixtureInternalAndExternal,
  89. inconsistentEmbeddingModel,
  90. } = getSelectedDatasetsMode(selectedDatasets)
  91. const {
  92. top_k = DATASET_DEFAULT.top_k,
  93. score_threshold,
  94. reranking_mode,
  95. reranking_model,
  96. weights,
  97. reranking_enable,
  98. } = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
  99. const result = {
  100. top_k,
  101. score_threshold,
  102. reranking_mode,
  103. reranking_model,
  104. weights,
  105. reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true,
  106. }
  107. if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal)
  108. result.reranking_mode = RerankingModeEnum.RerankingModel
  109. if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined && allInternal)
  110. result.reranking_mode = RerankingModeEnum.WeightedScore
  111. if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && allInternal && !weights) {
  112. result.weights = {
  113. vector_setting: {
  114. vector_weight: allHighQualityVectorSearch
  115. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
  116. : allHighQualityFullTextSearch
  117. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
  118. : DEFAULT_WEIGHTED_SCORE.other.semantic,
  119. embedding_provider_name: selectedDatasets[0].embedding_model_provider,
  120. embedding_model_name: selectedDatasets[0].embedding_model,
  121. },
  122. keyword_setting: {
  123. keyword_weight: allHighQualityVectorSearch
  124. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
  125. : allHighQualityFullTextSearch
  126. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
  127. : DEFAULT_WEIGHTED_SCORE.other.keyword,
  128. },
  129. }
  130. }
  131. return result
  132. }