datasets.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. const registerAPI = function (app) {
  2. app.get("/datasets/:id/documents", async (req, res) => {
  3. if (req.params.id === "0") res.send({ data: [] });
  4. else {
  5. res.send({
  6. data: [
  7. {
  8. id: 1,
  9. name: "Steve Jobs' life",
  10. words: "70k",
  11. word_count: 100,
  12. updated_at: 1681801029,
  13. indexing_status: "completed",
  14. archived: true,
  15. enabled: false,
  16. data_source_info: {
  17. upload_file: {
  18. // id: string
  19. // name: string
  20. // size: number
  21. // mime_type: string
  22. // created_at: number
  23. // created_by: string
  24. extension: "pdf",
  25. },
  26. },
  27. },
  28. {
  29. id: 2,
  30. name: "Steve Jobs' life",
  31. word_count: "10k",
  32. hit_count: 10,
  33. updated_at: 1681801029,
  34. indexing_status: "waiting",
  35. archived: true,
  36. enabled: false,
  37. data_source_info: {
  38. upload_file: {
  39. extension: "json",
  40. },
  41. },
  42. },
  43. {
  44. id: 3,
  45. name: "Steve Jobs' life xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
  46. word_count: "100k",
  47. hit_count: 0,
  48. updated_at: 1681801029,
  49. indexing_status: "indexing",
  50. archived: false,
  51. enabled: true,
  52. data_source_info: {
  53. upload_file: {
  54. extension: "txt",
  55. },
  56. },
  57. },
  58. {
  59. id: 4,
  60. name: "Steve Jobs' life xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
  61. word_count: "100k",
  62. hit_count: 0,
  63. updated_at: 1681801029,
  64. indexing_status: "splitting",
  65. archived: false,
  66. enabled: true,
  67. data_source_info: {
  68. upload_file: {
  69. extension: "md",
  70. },
  71. },
  72. },
  73. {
  74. id: 5,
  75. name: "Steve Jobs' life",
  76. word_count: "100k",
  77. hit_count: 0,
  78. updated_at: 1681801029,
  79. indexing_status: "error",
  80. archived: false,
  81. enabled: false,
  82. data_source_info: {
  83. upload_file: {
  84. extension: "html",
  85. },
  86. },
  87. },
  88. ],
  89. total: 100,
  90. id: req.params.id,
  91. });
  92. }
  93. });
  94. app.get("/datasets/:id/documents/:did/segments", async (req, res) => {
  95. if (req.params.id === "0") res.send({ data: [] });
  96. else {
  97. res.send({
  98. data: new Array(100).fill({
  99. id: 1234,
  100. content: `他的坚持让我很为难。众所周知他非常注意保护自己的隐私,而我想他应该从来没有看过我写的书。也许将来的某个时候吧,我还是这么说。但是,到了2009年,他的妻子劳伦·鲍威尔(Laurene Powell)直言不讳地对我说:“如果你真的打算写一本关于史蒂夫的书,最好现在就开始。”他当时刚刚第二次因病休假。我向劳伦坦承,当乔布斯第一次提出这个想法时,我并不知道他病了。几乎没有人知道,她说。他是在接受癌症手术之前给我打的电话,直到今天他还将此事作为一个秘密,她这么解释道。\n
  101. 他的坚持让我很为难。众所周知他非常注意保护自己的隐私,而我想他应该从来没有看过我写的书。也许将来的某个时候吧,我还是这么说。但是,到了2009年,他的妻子劳伦·鲍威尔(Laurene Powell)直言不讳地对我说:“如果你真的打算写一本关于史蒂夫的书,最好现在就开始。”他当时刚刚第二次因病休假。我向劳伦坦承,当乔布斯第一次提出这个想法时,我并不知道他病了。几乎没有人知道,她说。他是在接受癌症手术之前给我打的电话,直到今天他还将此事作为一个秘密,她这么解释道。`,
  102. enabled: true,
  103. keyWords: [
  104. "劳伦·鲍威尔",
  105. "劳伦·鲍威尔",
  106. "手术",
  107. "秘密",
  108. "癌症",
  109. "乔布斯",
  110. "史蒂夫",
  111. "书",
  112. "休假",
  113. "坚持",
  114. "隐私",
  115. ],
  116. word_count: 120,
  117. hit_count: 100,
  118. status: "ok",
  119. index_node_hash: "index_node_hash value",
  120. }),
  121. limit: 100,
  122. has_more: true,
  123. });
  124. }
  125. });
  126. // get doc detail
  127. app.get("/datasets/:id/documents/:did", async (req, res) => {
  128. const fixedParams = {
  129. // originInfo: {
  130. originalFilename: "Original filename",
  131. originalFileSize: "16mb",
  132. uploadDate: "2023-01-01",
  133. lastUpdateDate: "2023-01-05",
  134. source: "Source",
  135. // },
  136. // technicalParameters: {
  137. segmentSpecification: "909090",
  138. segmentLength: 100,
  139. avgParagraphLength: 130,
  140. };
  141. const bookData = {
  142. doc_type: "book",
  143. doc_metadata: {
  144. title: "机器学习实战",
  145. language: "zh",
  146. author: "Peter Harrington",
  147. publisher: "人民邮电出版社",
  148. publicationDate: "2013-01-01",
  149. ISBN: "9787115335500",
  150. category: "技术",
  151. },
  152. };
  153. const webData = {
  154. doc_type: "webPage",
  155. doc_metadata: {
  156. title: "深度学习入门教程",
  157. url: "https://www.example.com/deep-learning-tutorial",
  158. language: "zh",
  159. publishDate: "2020-05-01",
  160. authorPublisher: "张三",
  161. topicsKeywords: "深度学习, 人工智能, 教程",
  162. description:
  163. "这是一篇详细的深度学习入门教程,适用于对人工智能和深度学习感兴趣的初学者。",
  164. },
  165. };
  166. const postData = {
  167. doc_type: "socialMediaPost",
  168. doc_metadata: {
  169. platform: "Twitter",
  170. authorUsername: "example_user",
  171. publishDate: "2021-08-15",
  172. postURL: "https://twitter.com/example_user/status/1234567890",
  173. topicsTags:
  174. "AI, DeepLearning, Tutorial, Example, Example2, Example3, AI, DeepLearning, Tutorial, Example, Example2, Example3, AI, DeepLearning, Tutorial, Example, Example2, Example3,",
  175. },
  176. };
  177. res.send({
  178. id: "550e8400-e29b-41d4-a716-446655440000",
  179. position: 1,
  180. dataset_id: "550e8400-e29b-41d4-a716-446655440002",
  181. data_source_type: "upload_file",
  182. data_source_info: {
  183. upload_file: {
  184. extension: "html",
  185. id: "550e8400-e29b-41d4-a716-446655440003",
  186. },
  187. },
  188. dataset_process_rule_id: "550e8400-e29b-41d4-a716-446655440004",
  189. batch: "20230410123456123456",
  190. name: "example_document",
  191. created_from: "web",
  192. created_by: "550e8400-e29b-41d4-a716-446655440005",
  193. created_api_request_id: "550e8400-e29b-41d4-a716-446655440006",
  194. created_at: 1671269696,
  195. processing_started_at: 1671269700,
  196. word_count: 11,
  197. parsing_completed_at: 1671269710,
  198. cleaning_completed_at: 1671269720,
  199. splitting_completed_at: 1671269730,
  200. tokens: 10,
  201. indexing_latency: 5.0,
  202. completed_at: 1671269740,
  203. paused_by: null,
  204. paused_at: null,
  205. error: null,
  206. stopped_at: null,
  207. indexing_status: "completed",
  208. enabled: true,
  209. disabled_at: null,
  210. disabled_by: null,
  211. archived: false,
  212. archived_reason: null,
  213. archived_by: null,
  214. archived_at: null,
  215. updated_at: 1671269740,
  216. ...(req.params.did === "book"
  217. ? bookData
  218. : req.params.did === "web"
  219. ? webData
  220. : req.params.did === "post"
  221. ? postData
  222. : {}),
  223. segment_count: 10,
  224. hit_count: 9,
  225. status: "ok",
  226. });
  227. });
  228. // // logout
  229. // app.get("/logout", async (req, res) => {
  230. // res.send({
  231. // result: "success",
  232. // });
  233. // });
  234. // // Langgenius version
  235. // app.get("/version", async (req, res) => {
  236. // res.send({
  237. // current_version: "v1.0.0",
  238. // latest_version: "v1.0.0",
  239. // upgradeable: true,
  240. // compatible_upgrade: true,
  241. // });
  242. // });
  243. };
  244. module.exports = registerAPI;