utils.ts 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. import {
  2. uniq,
  3. xorBy,
  4. } from 'lodash-es'
  5. import type { MultipleRetrievalConfig } from './types'
  6. import type {
  7. DataSet,
  8. SelectedDatasetsMode,
  9. } from '@/models/datasets'
  10. import {
  11. DEFAULT_WEIGHTED_SCORE,
  12. RerankingModeEnum,
  13. } from '@/models/datasets'
  14. import { RETRIEVE_METHOD } from '@/types/app'
  15. import { DATASET_DEFAULT } from '@/config'
  16. export const checkNodeValid = () => {
  17. return true
  18. }
  19. export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => {
  20. if (datasets === null)
  21. datasets = []
  22. let allHighQuality = true
  23. let allHighQualityVectorSearch = true
  24. let allHighQualityFullTextSearch = true
  25. let allEconomic = true
  26. let mixtureHighQualityAndEconomic = true
  27. let allExternal = true
  28. let allInternal = true
  29. let mixtureInternalAndExternal = true
  30. let inconsistentEmbeddingModel = false
  31. if (!datasets.length) {
  32. allHighQuality = false
  33. allHighQualityVectorSearch = false
  34. allHighQualityFullTextSearch = false
  35. allEconomic = false
  36. mixtureHighQualityAndEconomic = false
  37. inconsistentEmbeddingModel = false
  38. allExternal = false
  39. allInternal = false
  40. mixtureInternalAndExternal = false
  41. }
  42. datasets.forEach((dataset) => {
  43. if (dataset.indexing_technique === 'economy') {
  44. allHighQuality = false
  45. allHighQualityVectorSearch = false
  46. allHighQualityFullTextSearch = false
  47. }
  48. if (dataset.indexing_technique === 'high_quality') {
  49. allEconomic = false
  50. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
  51. allHighQualityVectorSearch = false
  52. if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
  53. allHighQualityFullTextSearch = false
  54. }
  55. if (dataset.provider !== 'external') {
  56. allExternal = false
  57. }
  58. else {
  59. allInternal = false
  60. allHighQuality = false
  61. allHighQualityVectorSearch = false
  62. allHighQualityFullTextSearch = false
  63. mixtureHighQualityAndEconomic = false
  64. }
  65. })
  66. if (allExternal || allInternal)
  67. mixtureInternalAndExternal = false
  68. if (allHighQuality || allEconomic)
  69. mixtureHighQualityAndEconomic = false
  70. if (allHighQuality)
  71. inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
  72. return {
  73. allHighQuality,
  74. allHighQualityVectorSearch,
  75. allHighQualityFullTextSearch,
  76. allEconomic,
  77. mixtureHighQualityAndEconomic,
  78. allInternal,
  79. allExternal,
  80. mixtureInternalAndExternal,
  81. inconsistentEmbeddingModel,
  82. } as SelectedDatasetsMode
  83. }
  84. export const getMultipleRetrievalConfig = (
  85. multipleRetrievalConfig: MultipleRetrievalConfig,
  86. selectedDatasets: DataSet[],
  87. originalDatasets: DataSet[],
  88. validRerankModel?: { provider?: string; model?: string },
  89. ) => {
  90. const shouldSetWeightDefaultValue = xorBy(selectedDatasets, originalDatasets, 'id').length > 0
  91. const rerankModelIsValid = validRerankModel?.provider && validRerankModel?.model
  92. const {
  93. allHighQuality,
  94. allHighQualityVectorSearch,
  95. allHighQualityFullTextSearch,
  96. allEconomic,
  97. mixtureHighQualityAndEconomic,
  98. allInternal,
  99. allExternal,
  100. mixtureInternalAndExternal,
  101. inconsistentEmbeddingModel,
  102. } = getSelectedDatasetsMode(selectedDatasets)
  103. const {
  104. top_k = DATASET_DEFAULT.top_k,
  105. score_threshold,
  106. reranking_mode,
  107. reranking_model,
  108. weights,
  109. reranking_enable,
  110. } = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
  111. const result = {
  112. top_k,
  113. score_threshold,
  114. reranking_mode,
  115. reranking_model,
  116. weights,
  117. reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue,
  118. }
  119. const setDefaultWeights = () => {
  120. result.weights = {
  121. vector_setting: {
  122. vector_weight: allHighQualityVectorSearch
  123. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
  124. : allHighQualityFullTextSearch
  125. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
  126. : DEFAULT_WEIGHTED_SCORE.other.semantic,
  127. embedding_provider_name: selectedDatasets[0].embedding_model_provider,
  128. embedding_model_name: selectedDatasets[0].embedding_model,
  129. },
  130. keyword_setting: {
  131. keyword_weight: allHighQualityVectorSearch
  132. ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
  133. : allHighQualityFullTextSearch
  134. ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
  135. : DEFAULT_WEIGHTED_SCORE.other.keyword,
  136. },
  137. }
  138. }
  139. if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) {
  140. result.reranking_mode = RerankingModeEnum.RerankingModel
  141. if (!result.reranking_model?.provider || !result.reranking_model?.model) {
  142. if (rerankModelIsValid) {
  143. result.reranking_enable = true
  144. result.reranking_model = {
  145. provider: validRerankModel?.provider || '',
  146. model: validRerankModel?.model || '',
  147. }
  148. }
  149. else {
  150. result.reranking_model = {
  151. provider: '',
  152. model: '',
  153. }
  154. }
  155. }
  156. }
  157. if (allHighQuality && !inconsistentEmbeddingModel && allInternal) {
  158. if (!reranking_mode) {
  159. if (validRerankModel?.provider && validRerankModel?.model) {
  160. result.reranking_mode = RerankingModeEnum.RerankingModel
  161. result.reranking_enable = true
  162. result.reranking_model = {
  163. provider: validRerankModel.provider,
  164. model: validRerankModel.model,
  165. }
  166. }
  167. else {
  168. result.reranking_mode = RerankingModeEnum.WeightedScore
  169. setDefaultWeights()
  170. }
  171. }
  172. if (reranking_mode === RerankingModeEnum.WeightedScore && !weights)
  173. setDefaultWeights()
  174. if (reranking_mode === RerankingModeEnum.WeightedScore && weights && shouldSetWeightDefaultValue) {
  175. if (rerankModelIsValid) {
  176. result.reranking_mode = RerankingModeEnum.RerankingModel
  177. result.reranking_enable = true
  178. result.reranking_model = {
  179. provider: validRerankModel.provider || '',
  180. model: validRerankModel.model || '',
  181. }
  182. }
  183. else {
  184. setDefaultWeights()
  185. }
  186. }
  187. if (reranking_mode === RerankingModeEnum.RerankingModel && !rerankModelIsValid && shouldSetWeightDefaultValue) {
  188. result.reranking_mode = RerankingModeEnum.WeightedScore
  189. setDefaultWeights()
  190. }
  191. if (reranking_mode === RerankingModeEnum.RerankingModel && rerankModelIsValid) {
  192. result.reranking_enable = true
  193. result.reranking_model = {
  194. provider: validRerankModel.provider || '',
  195. model: validRerankModel.model || '',
  196. }
  197. }
  198. }
  199. return result
  200. }
  201. export const checkoutRerankModelConfigedInRetrievalSettings = (
  202. datasets: DataSet[],
  203. multipleRetrievalConfig?: MultipleRetrievalConfig,
  204. ) => {
  205. if (!multipleRetrievalConfig)
  206. return true
  207. const {
  208. allEconomic,
  209. allExternal,
  210. } = getSelectedDatasetsMode(datasets)
  211. const {
  212. reranking_enable,
  213. reranking_mode,
  214. reranking_model,
  215. } = multipleRetrievalConfig
  216. if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model)) {
  217. if ((allEconomic || allExternal) && !reranking_enable)
  218. return true
  219. return false
  220. }
  221. return true
  222. }