datasets.ts 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. import { AppMode } from './app'
  2. export type DataSet = {
  3. id: string
  4. name: string
  5. icon: string
  6. icon_background: string
  7. description: string
  8. permission: 'only_me' | 'all_team_members'
  9. data_source_type: 'upload_file'
  10. indexing_technique: 'high_quality' | 'economy'
  11. created_by: string
  12. updated_by: string
  13. updated_at: number
  14. app_count: number
  15. document_count: number
  16. word_count: number
  17. }
  18. export type File = {
  19. id: string
  20. name: string
  21. size: number
  22. extension: string
  23. mime_type: string
  24. created_by: string
  25. created_at: number
  26. }
  27. export type DataSetListResponse = {
  28. data: DataSet[]
  29. }
  30. export type IndexingEstimateResponse = {
  31. tokens: number
  32. total_price: number
  33. currency: string
  34. total_segments: number
  35. preview: string[]
  36. }
  37. export interface FileIndexingEstimateResponse extends IndexingEstimateResponse {
  38. total_nodes: number
  39. }
  40. export type IndexingStatusResponse = {
  41. id: string
  42. indexing_status: DocumentIndexingStatus
  43. processing_started_at: number
  44. parsing_completed_at: number
  45. cleaning_completed_at: number
  46. splitting_completed_at: number
  47. completed_at: any
  48. paused_at: any
  49. error: any
  50. stopped_at: any
  51. completed_segments: number
  52. total_segments: number
  53. }
  54. export type ProcessMode = 'automatic' | 'custom'
  55. export type ProcessRuleResponse = {
  56. mode: ProcessMode
  57. rules: Rules
  58. }
  59. export type Rules = {
  60. pre_processing_rules: PreProcessingRule[]
  61. segmentation: Segmentation
  62. }
  63. export type PreProcessingRule = {
  64. id: string
  65. enabled: boolean
  66. }
  67. export type Segmentation = {
  68. separator: string
  69. max_tokens: number
  70. }
  71. export const DocumentIndexingStatusList = [
  72. 'waiting',
  73. 'parsing',
  74. 'cleaning',
  75. 'splitting',
  76. 'indexing',
  77. 'paused',
  78. 'error',
  79. 'completed',
  80. ] as const
  81. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  82. export const DisplayStatusList = [
  83. "queuing",
  84. "indexing",
  85. "paused",
  86. "error",
  87. "available",
  88. "enabled",
  89. "disabled",
  90. "archived",
  91. ] as const;
  92. export type DocumentDisplayStatus = typeof DisplayStatusList[number];
  93. export type DataSourceInfo = {
  94. upload_file: {
  95. id: string
  96. name: string
  97. size: number
  98. mime_type: string
  99. created_at: number
  100. created_by: string
  101. extension: string
  102. }
  103. }
  104. export type InitialDocumentDetail = {
  105. id: string
  106. position: number
  107. dataset_id: string
  108. data_source_type: 'upload_file'
  109. data_source_info: DataSourceInfo
  110. dataset_process_rule_id: string
  111. name: string
  112. created_from: 'api' | 'web'
  113. created_by: string
  114. created_at: number
  115. indexing_status: DocumentIndexingStatus
  116. display_status: DocumentDisplayStatus
  117. }
  118. export type SimpleDocumentDetail = InitialDocumentDetail & {
  119. enabled: boolean
  120. word_count: number
  121. error?: string | null
  122. archived: boolean
  123. updated_at: number
  124. hit_count: number
  125. dataset_process_rule_id?: string
  126. }
  127. export type DocumentListResponse = {
  128. data: SimpleDocumentDetail[]
  129. has_more: boolean
  130. total: number
  131. page: number
  132. limit: number
  133. }
  134. export type CreateDocumentReq = {
  135. original_document_id?: string
  136. indexing_technique?: string;
  137. name: string
  138. data_source: DataSource
  139. process_rule: ProcessRule
  140. }
  141. export type DataSource = {
  142. type: string
  143. info: string // upload_file_id
  144. name: string
  145. }
  146. export type ProcessRule = {
  147. mode: string
  148. rules: Rules
  149. }
  150. export type createDocumentResponse = {
  151. dataset?: DataSet
  152. document: InitialDocumentDetail
  153. }
  154. export type FullDocumentDetail = SimpleDocumentDetail & {
  155. batch: string
  156. created_api_request_id: string
  157. processing_started_at: number
  158. parsing_completed_at: number
  159. cleaning_completed_at: number
  160. splitting_completed_at: number
  161. tokens: number
  162. indexing_latency: number
  163. completed_at: number
  164. paused_by: string
  165. paused_at: number
  166. stopped_at: number
  167. indexing_status: string
  168. disabled_at: number
  169. disabled_by: string
  170. archived_reason: 'rule_modified' | 're_upload'
  171. archived_by: string
  172. archived_at: number
  173. doc_type?: DocType | null
  174. doc_metadata?: DocMetadata | null
  175. segment_count: number
  176. [key: string]: any
  177. }
  178. export type DocMetadata = {
  179. title: string
  180. language: string
  181. author: string
  182. publisher: string
  183. publicationDate: string
  184. ISBN: string
  185. category: string
  186. [key: string]: string
  187. }
  188. export const CUSTOMIZABLE_DOC_TYPES = [
  189. "book",
  190. "web_page",
  191. "paper",
  192. "social_media_post",
  193. "personal_document",
  194. "business_document",
  195. "im_chat_log",
  196. ] as const;
  197. export const FIXED_DOC_TYPES = ["synced_from_github", "synced_from_notion", "wikipedia_entry"] as const;
  198. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number];
  199. export type FixedDocType = typeof FIXED_DOC_TYPES[number];
  200. export type DocType = CustomizableDocType | FixedDocType;
  201. export type DocumentDetailResponse = FullDocumentDetail
  202. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  203. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  204. export type SegmentsQuery = {
  205. last_id?: string
  206. limit: number
  207. // status?: SegmentStatus
  208. hit_count_gte?: number
  209. keyword?: string
  210. enabled?: boolean
  211. }
  212. export type SegmentDetailModel = {
  213. id: string
  214. position: number
  215. document_id: string
  216. content: string
  217. word_count: number
  218. tokens: number
  219. keywords: string[]
  220. index_node_id: string
  221. index_node_hash: string
  222. hit_count: number
  223. enabled: boolean
  224. disabled_at: number
  225. disabled_by: string
  226. status: SegmentStatus
  227. created_by: string
  228. created_at: number
  229. indexing_at: number
  230. completed_at: number
  231. error: string | null
  232. stopped_at: number
  233. }
  234. export type SegmentsResponse = {
  235. data: SegmentDetailModel[]
  236. has_more: boolean
  237. limit: number
  238. total: number
  239. }
  240. export type HitTestingRecord = {
  241. id: string
  242. content: string
  243. source: 'app' | 'hit_testing' | 'plugin'
  244. source_app_id: string
  245. created_by_role: 'account' | 'end_user'
  246. created_by: string
  247. created_at: number
  248. }
  249. export type HitTesting = {
  250. segment: Segment
  251. score: number
  252. tsne_position: TsnePosition
  253. }
  254. export type Segment = {
  255. id: string
  256. document: Document
  257. content: string
  258. position: number
  259. word_count: number
  260. tokens: number
  261. keywords: string[]
  262. hit_count: number
  263. index_node_hash: string
  264. }
  265. export type Document = {
  266. id: string
  267. data_source_type: string
  268. name: string
  269. doc_type: DocType
  270. }
  271. export type HitTestingRecordsResponse = {
  272. data: HitTestingRecord[]
  273. has_more: boolean
  274. limit: number
  275. total: number
  276. page: number
  277. }
  278. export type TsnePosition = {
  279. x: number
  280. y: number
  281. }
  282. export type HitTestingResponse = {
  283. query: {
  284. content: string
  285. tsne_position: TsnePosition
  286. }
  287. records: Array<HitTesting>
  288. }
  289. export type RelatedApp = {
  290. id: string
  291. name: string
  292. mode: AppMode
  293. icon: string
  294. icon_background: string
  295. }
  296. export type RelatedAppResponse = {
  297. data: Array<RelatedApp>
  298. total: number
  299. }