commands.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. import datetime
  2. import json
  3. import math
  4. import random
  5. import string
  6. import threading
  7. import time
  8. import uuid
  9. import click
  10. import qdrant_client
  11. from qdrant_client.http.models import TextIndexParams, TextIndexType, TokenizerType
  12. from tqdm import tqdm
  13. from flask import current_app, Flask
  14. from langchain.embeddings import OpenAIEmbeddings
  15. from werkzeug.exceptions import NotFound
  16. from core.embedding.cached_embedding import CacheEmbedding
  17. from core.index.index import IndexBuilder
  18. from core.model_providers.model_factory import ModelFactory
  19. from core.model_providers.models.embedding.openai_embedding import OpenAIEmbedding
  20. from core.model_providers.models.entity.model_params import ModelType
  21. from core.model_providers.providers.hosted import hosted_model_providers
  22. from core.model_providers.providers.openai_provider import OpenAIProvider
  23. from libs.password import password_pattern, valid_password, hash_password
  24. from libs.helper import email as email_validate
  25. from extensions.ext_database import db
  26. from libs.rsa import generate_key_pair
  27. from models.account import InvitationCode, Tenant, TenantAccountJoin
  28. from models.dataset import Dataset, DatasetQuery, Document, DatasetCollectionBinding
  29. from models.model import Account, AppModelConfig, App, MessageAnnotation, Message
  30. import secrets
  31. import base64
  32. from models.provider import Provider, ProviderType, ProviderQuotaType, ProviderModel
  33. @click.command('reset-password', help='Reset the account password.')
  34. @click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
  35. @click.option('--new-password', prompt=True, help='the new password.')
  36. @click.option('--password-confirm', prompt=True, help='the new password confirm.')
  37. def reset_password(email, new_password, password_confirm):
  38. if str(new_password).strip() != str(password_confirm).strip():
  39. click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
  40. return
  41. account = db.session.query(Account). \
  42. filter(Account.email == email). \
  43. one_or_none()
  44. if not account:
  45. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  46. return
  47. try:
  48. valid_password(new_password)
  49. except:
  50. click.echo(
  51. click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
  52. return
  53. # generate password salt
  54. salt = secrets.token_bytes(16)
  55. base64_salt = base64.b64encode(salt).decode()
  56. # encrypt password with salt
  57. password_hashed = hash_password(new_password, salt)
  58. base64_password_hashed = base64.b64encode(password_hashed).decode()
  59. account.password = base64_password_hashed
  60. account.password_salt = base64_salt
  61. db.session.commit()
  62. click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
  63. @click.command('reset-email', help='Reset the account email.')
  64. @click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
  65. @click.option('--new-email', prompt=True, help='the new email.')
  66. @click.option('--email-confirm', prompt=True, help='the new email confirm.')
  67. def reset_email(email, new_email, email_confirm):
  68. if str(new_email).strip() != str(email_confirm).strip():
  69. click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
  70. return
  71. account = db.session.query(Account). \
  72. filter(Account.email == email). \
  73. one_or_none()
  74. if not account:
  75. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  76. return
  77. try:
  78. email_validate(new_email)
  79. except:
  80. click.echo(
  81. click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
  82. return
  83. account.email = new_email
  84. db.session.commit()
  85. click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
  86. @click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. '
  87. 'After the reset, all LLM credentials will become invalid, '
  88. 'requiring re-entry.'
  89. 'Only support SELF_HOSTED mode.')
  90. @click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?'
  91. ' this operation cannot be rolled back!', fg='red'))
  92. def reset_encrypt_key_pair():
  93. if current_app.config['EDITION'] != 'SELF_HOSTED':
  94. click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
  95. return
  96. tenant = db.session.query(Tenant).first()
  97. if not tenant:
  98. click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
  99. return
  100. tenant.encrypt_public_key = generate_key_pair(tenant.id)
  101. db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
  102. db.session.query(ProviderModel).delete()
  103. db.session.commit()
  104. click.echo(click.style('Congratulations! '
  105. 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
  106. @click.command('generate-invitation-codes', help='Generate invitation codes.')
  107. @click.option('--batch', help='The batch of invitation codes.')
  108. @click.option('--count', prompt=True, help='Invitation codes count.')
  109. def generate_invitation_codes(batch, count):
  110. if not batch:
  111. now = datetime.datetime.now()
  112. batch = now.strftime('%Y%m%d%H%M%S')
  113. if not count or int(count) <= 0:
  114. click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
  115. return
  116. count = int(count)
  117. click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
  118. codes = ''
  119. for i in range(count):
  120. code = generate_invitation_code()
  121. invitation_code = InvitationCode(
  122. code=code,
  123. batch=batch
  124. )
  125. db.session.add(invitation_code)
  126. click.echo(code)
  127. codes += code + "\n"
  128. db.session.commit()
  129. filename = 'storage/invitation-codes-{}.txt'.format(batch)
  130. with open(filename, 'w') as f:
  131. f.write(codes)
  132. click.echo(click.style(
  133. 'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
  134. filename),
  135. fg='green'))
  136. def generate_invitation_code():
  137. code = generate_upper_string()
  138. while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
  139. code = generate_upper_string()
  140. return code
  141. def generate_upper_string():
  142. letters_digits = string.ascii_uppercase + string.digits
  143. result = ""
  144. for i in range(8):
  145. result += random.choice(letters_digits)
  146. return result
  147. @click.command('recreate-all-dataset-indexes', help='Recreate all dataset indexes.')
  148. def recreate_all_dataset_indexes():
  149. click.echo(click.style('Start recreate all dataset indexes.', fg='green'))
  150. recreate_count = 0
  151. page = 1
  152. while True:
  153. try:
  154. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  155. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  156. except NotFound:
  157. break
  158. page += 1
  159. for dataset in datasets:
  160. try:
  161. click.echo('Recreating dataset index: {}'.format(dataset.id))
  162. index = IndexBuilder.get_index(dataset, 'high_quality')
  163. if index and index._is_origin():
  164. index.recreate_dataset(dataset)
  165. recreate_count += 1
  166. else:
  167. click.echo('passed.')
  168. except Exception as e:
  169. click.echo(
  170. click.style('Recreate dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
  171. continue
  172. click.echo(click.style('Congratulations! Recreate {} dataset indexes.'.format(recreate_count), fg='green'))
  173. @click.command('clean-unused-dataset-indexes', help='Clean unused dataset indexes.')
  174. def clean_unused_dataset_indexes():
  175. click.echo(click.style('Start clean unused dataset indexes.', fg='green'))
  176. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  177. start_at = time.perf_counter()
  178. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  179. page = 1
  180. while True:
  181. try:
  182. datasets = db.session.query(Dataset).filter(Dataset.created_at < thirty_days_ago) \
  183. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  184. except NotFound:
  185. break
  186. page += 1
  187. for dataset in datasets:
  188. dataset_query = db.session.query(DatasetQuery).filter(
  189. DatasetQuery.created_at > thirty_days_ago,
  190. DatasetQuery.dataset_id == dataset.id
  191. ).all()
  192. if not dataset_query or len(dataset_query) == 0:
  193. documents = db.session.query(Document).filter(
  194. Document.dataset_id == dataset.id,
  195. Document.indexing_status == 'completed',
  196. Document.enabled == True,
  197. Document.archived == False,
  198. Document.updated_at > thirty_days_ago
  199. ).all()
  200. if not documents or len(documents) == 0:
  201. try:
  202. # remove index
  203. vector_index = IndexBuilder.get_index(dataset, 'high_quality')
  204. kw_index = IndexBuilder.get_index(dataset, 'economy')
  205. # delete from vector index
  206. if vector_index:
  207. if dataset.collection_binding_id:
  208. vector_index.delete_by_group_id(dataset.id)
  209. else:
  210. if dataset.collection_binding_id:
  211. vector_index.delete_by_group_id(dataset.id)
  212. else:
  213. vector_index.delete()
  214. kw_index.delete()
  215. # update document
  216. update_params = {
  217. Document.enabled: False
  218. }
  219. Document.query.filter_by(dataset_id=dataset.id).update(update_params)
  220. db.session.commit()
  221. click.echo(click.style('Cleaned unused dataset {} from db success!'.format(dataset.id),
  222. fg='green'))
  223. except Exception as e:
  224. click.echo(
  225. click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  226. fg='red'))
  227. end_at = time.perf_counter()
  228. click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
  229. @click.command('sync-anthropic-hosted-providers', help='Sync anthropic hosted providers.')
  230. def sync_anthropic_hosted_providers():
  231. if not hosted_model_providers.anthropic:
  232. click.echo(click.style('Anthropic hosted provider is not configured.', fg='red'))
  233. return
  234. click.echo(click.style('Start sync anthropic hosted providers.', fg='green'))
  235. count = 0
  236. new_quota_limit = hosted_model_providers.anthropic.quota_limit
  237. page = 1
  238. while True:
  239. try:
  240. providers = db.session.query(Provider).filter(
  241. Provider.provider_name == 'anthropic',
  242. Provider.provider_type == ProviderType.SYSTEM.value,
  243. Provider.quota_type == ProviderQuotaType.TRIAL.value,
  244. Provider.quota_limit != new_quota_limit
  245. ).order_by(Provider.created_at.desc()).paginate(page=page, per_page=100)
  246. except NotFound:
  247. break
  248. page += 1
  249. for provider in providers:
  250. try:
  251. click.echo('Syncing tenant anthropic hosted provider: {}, origin: limit {}, used {}'
  252. .format(provider.tenant_id, provider.quota_limit, provider.quota_used))
  253. original_quota_limit = provider.quota_limit
  254. division = math.ceil(new_quota_limit / 1000)
  255. provider.quota_limit = new_quota_limit if original_quota_limit == 1000 \
  256. else original_quota_limit * division
  257. provider.quota_used = division * provider.quota_used
  258. db.session.commit()
  259. count += 1
  260. except Exception as e:
  261. click.echo(click.style(
  262. 'Sync tenant anthropic hosted provider error: {} {}'.format(e.__class__.__name__, str(e)),
  263. fg='red'))
  264. continue
  265. click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
  266. @click.command('create-qdrant-indexes', help='Create qdrant indexes.')
  267. def create_qdrant_indexes():
  268. click.echo(click.style('Start create qdrant indexes.', fg='green'))
  269. create_count = 0
  270. page = 1
  271. while True:
  272. try:
  273. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  274. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  275. except NotFound:
  276. break
  277. page += 1
  278. for dataset in datasets:
  279. if dataset.index_struct_dict:
  280. if dataset.index_struct_dict['type'] != 'qdrant':
  281. try:
  282. click.echo('Create dataset qdrant index: {}'.format(dataset.id))
  283. try:
  284. embedding_model = ModelFactory.get_embedding_model(
  285. tenant_id=dataset.tenant_id,
  286. model_provider_name=dataset.embedding_model_provider,
  287. model_name=dataset.embedding_model
  288. )
  289. except Exception:
  290. try:
  291. embedding_model = ModelFactory.get_embedding_model(
  292. tenant_id=dataset.tenant_id
  293. )
  294. dataset.embedding_model = embedding_model.name
  295. dataset.embedding_model_provider = embedding_model.model_provider.provider_name
  296. except Exception:
  297. provider = Provider(
  298. id='provider_id',
  299. tenant_id=dataset.tenant_id,
  300. provider_name='openai',
  301. provider_type=ProviderType.SYSTEM.value,
  302. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  303. is_valid=True,
  304. )
  305. model_provider = OpenAIProvider(provider=provider)
  306. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  307. model_provider=model_provider)
  308. embeddings = CacheEmbedding(embedding_model)
  309. from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
  310. index = QdrantVectorIndex(
  311. dataset=dataset,
  312. config=QdrantConfig(
  313. endpoint=current_app.config.get('QDRANT_URL'),
  314. api_key=current_app.config.get('QDRANT_API_KEY'),
  315. root_path=current_app.root_path
  316. ),
  317. embeddings=embeddings
  318. )
  319. if index:
  320. index.create_qdrant_dataset(dataset)
  321. index_struct = {
  322. "type": 'qdrant',
  323. "vector_store": {
  324. "class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']}
  325. }
  326. dataset.index_struct = json.dumps(index_struct)
  327. db.session.commit()
  328. create_count += 1
  329. else:
  330. click.echo('passed.')
  331. except Exception as e:
  332. click.echo(
  333. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  334. fg='red'))
  335. continue
  336. click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
  337. @click.command('update-qdrant-indexes', help='Update qdrant indexes.')
  338. def update_qdrant_indexes():
  339. click.echo(click.style('Start Update qdrant indexes.', fg='green'))
  340. create_count = 0
  341. page = 1
  342. while True:
  343. try:
  344. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  345. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  346. except NotFound:
  347. break
  348. page += 1
  349. for dataset in datasets:
  350. if dataset.index_struct_dict:
  351. if dataset.index_struct_dict['type'] != 'qdrant':
  352. try:
  353. click.echo('Update dataset qdrant index: {}'.format(dataset.id))
  354. try:
  355. embedding_model = ModelFactory.get_embedding_model(
  356. tenant_id=dataset.tenant_id,
  357. model_provider_name=dataset.embedding_model_provider,
  358. model_name=dataset.embedding_model
  359. )
  360. except Exception:
  361. provider = Provider(
  362. id='provider_id',
  363. tenant_id=dataset.tenant_id,
  364. provider_name='openai',
  365. provider_type=ProviderType.CUSTOM.value,
  366. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  367. is_valid=True,
  368. )
  369. model_provider = OpenAIProvider(provider=provider)
  370. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  371. model_provider=model_provider)
  372. embeddings = CacheEmbedding(embedding_model)
  373. from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
  374. index = QdrantVectorIndex(
  375. dataset=dataset,
  376. config=QdrantConfig(
  377. endpoint=current_app.config.get('QDRANT_URL'),
  378. api_key=current_app.config.get('QDRANT_API_KEY'),
  379. root_path=current_app.root_path
  380. ),
  381. embeddings=embeddings
  382. )
  383. if index:
  384. index.update_qdrant_dataset(dataset)
  385. create_count += 1
  386. else:
  387. click.echo('passed.')
  388. except Exception as e:
  389. click.echo(
  390. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  391. fg='red'))
  392. continue
  393. click.echo(click.style('Congratulations! Update {} dataset indexes.'.format(create_count), fg='green'))
  394. @click.command('normalization-collections', help='restore all collections in one')
  395. def normalization_collections():
  396. click.echo(click.style('Start normalization collections.', fg='green'))
  397. normalization_count = []
  398. page = 1
  399. while True:
  400. try:
  401. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  402. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=100)
  403. except NotFound:
  404. break
  405. datasets_result = datasets.items
  406. page += 1
  407. for i in range(0, len(datasets_result), 5):
  408. threads = []
  409. sub_datasets = datasets_result[i:i + 5]
  410. for dataset in sub_datasets:
  411. document_format_thread = threading.Thread(target=deal_dataset_vector, kwargs={
  412. 'flask_app': current_app._get_current_object(),
  413. 'dataset': dataset,
  414. 'normalization_count': normalization_count
  415. })
  416. threads.append(document_format_thread)
  417. document_format_thread.start()
  418. for thread in threads:
  419. thread.join()
  420. click.echo(click.style('Congratulations! restore {} dataset indexes.'.format(len(normalization_count)), fg='green'))
  421. @click.command('add-qdrant-full-text-index', help='add qdrant full text index')
  422. def add_qdrant_full_text_index():
  423. click.echo(click.style('Start add full text index.', fg='green'))
  424. binds = db.session.query(DatasetCollectionBinding).all()
  425. if binds and current_app.config['VECTOR_STORE'] == 'qdrant':
  426. qdrant_url = current_app.config['QDRANT_URL']
  427. qdrant_api_key = current_app.config['QDRANT_API_KEY']
  428. client = qdrant_client.QdrantClient(
  429. qdrant_url,
  430. api_key=qdrant_api_key, # For Qdrant Cloud, None for local instance
  431. )
  432. for bind in binds:
  433. try:
  434. text_index_params = TextIndexParams(
  435. type=TextIndexType.TEXT,
  436. tokenizer=TokenizerType.MULTILINGUAL,
  437. min_token_len=2,
  438. max_token_len=20,
  439. lowercase=True
  440. )
  441. client.create_payload_index(bind.collection_name, 'page_content',
  442. field_schema=text_index_params)
  443. except Exception as e:
  444. click.echo(
  445. click.style('Create full text index error: {} {}'.format(e.__class__.__name__, str(e)),
  446. fg='red'))
  447. click.echo(
  448. click.style(
  449. 'Congratulations! add collection {} full text index successful.'.format(bind.collection_name),
  450. fg='green'))
  451. def deal_dataset_vector(flask_app: Flask, dataset: Dataset, normalization_count: list):
  452. with flask_app.app_context():
  453. try:
  454. click.echo('restore dataset index: {}'.format(dataset.id))
  455. try:
  456. embedding_model = ModelFactory.get_embedding_model(
  457. tenant_id=dataset.tenant_id,
  458. model_provider_name=dataset.embedding_model_provider,
  459. model_name=dataset.embedding_model
  460. )
  461. except Exception:
  462. provider = Provider(
  463. id='provider_id',
  464. tenant_id=dataset.tenant_id,
  465. provider_name='openai',
  466. provider_type=ProviderType.CUSTOM.value,
  467. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  468. is_valid=True,
  469. )
  470. model_provider = OpenAIProvider(provider=provider)
  471. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  472. model_provider=model_provider)
  473. embeddings = CacheEmbedding(embedding_model)
  474. dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
  475. filter(DatasetCollectionBinding.provider_name == embedding_model.model_provider.provider_name,
  476. DatasetCollectionBinding.model_name == embedding_model.name). \
  477. order_by(DatasetCollectionBinding.created_at). \
  478. first()
  479. if not dataset_collection_binding:
  480. dataset_collection_binding = DatasetCollectionBinding(
  481. provider_name=embedding_model.model_provider.provider_name,
  482. model_name=embedding_model.name,
  483. collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node'
  484. )
  485. db.session.add(dataset_collection_binding)
  486. db.session.commit()
  487. from core.index.vector_index.qdrant_vector_index import QdrantVectorIndex, QdrantConfig
  488. index = QdrantVectorIndex(
  489. dataset=dataset,
  490. config=QdrantConfig(
  491. endpoint=current_app.config.get('QDRANT_URL'),
  492. api_key=current_app.config.get('QDRANT_API_KEY'),
  493. root_path=current_app.root_path
  494. ),
  495. embeddings=embeddings
  496. )
  497. if index:
  498. # index.delete_by_group_id(dataset.id)
  499. index.restore_dataset_in_one(dataset, dataset_collection_binding)
  500. else:
  501. click.echo('passed.')
  502. normalization_count.append(1)
  503. except Exception as e:
  504. click.echo(
  505. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  506. fg='red'))
  507. @click.command('update_app_model_configs', help='Migrate data to support paragraph variable.')
  508. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  509. def update_app_model_configs(batch_size):
  510. pre_prompt_template = '{{default_input}}'
  511. user_input_form_template = {
  512. "en-US": [
  513. {
  514. "paragraph": {
  515. "label": "Query",
  516. "variable": "default_input",
  517. "required": False,
  518. "default": ""
  519. }
  520. }
  521. ],
  522. "zh-Hans": [
  523. {
  524. "paragraph": {
  525. "label": "查询内容",
  526. "variable": "default_input",
  527. "required": False,
  528. "default": ""
  529. }
  530. }
  531. ]
  532. }
  533. click.secho("Start migrate old data that the text generator can support paragraph variable.", fg='green')
  534. total_records = db.session.query(AppModelConfig) \
  535. .join(App, App.app_model_config_id == AppModelConfig.id) \
  536. .filter(App.mode == 'completion') \
  537. .count()
  538. if total_records == 0:
  539. click.secho("No data to migrate.", fg='green')
  540. return
  541. num_batches = (total_records + batch_size - 1) // batch_size
  542. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  543. for i in range(num_batches):
  544. offset = i * batch_size
  545. limit = min(batch_size, total_records - offset)
  546. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  547. data_batch = db.session.query(AppModelConfig) \
  548. .join(App, App.app_model_config_id == AppModelConfig.id) \
  549. .filter(App.mode == 'completion') \
  550. .order_by(App.created_at) \
  551. .offset(offset).limit(limit).all()
  552. if not data_batch:
  553. click.secho("No more data to migrate.", fg='green')
  554. break
  555. try:
  556. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  557. for data in data_batch:
  558. # click.secho(f"Migrating data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  559. if data.pre_prompt is None:
  560. data.pre_prompt = pre_prompt_template
  561. else:
  562. if pre_prompt_template in data.pre_prompt:
  563. continue
  564. data.pre_prompt += pre_prompt_template
  565. app_data = db.session.query(App) \
  566. .filter(App.id == data.app_id) \
  567. .one()
  568. account_data = db.session.query(Account) \
  569. .join(TenantAccountJoin, Account.id == TenantAccountJoin.account_id) \
  570. .filter(TenantAccountJoin.role == 'owner') \
  571. .filter(TenantAccountJoin.tenant_id == app_data.tenant_id) \
  572. .one_or_none()
  573. if not account_data:
  574. continue
  575. if data.user_input_form is None or data.user_input_form == 'null':
  576. data.user_input_form = json.dumps(user_input_form_template[account_data.interface_language])
  577. else:
  578. raw_json_data = json.loads(data.user_input_form)
  579. raw_json_data.append(user_input_form_template[account_data.interface_language][0])
  580. data.user_input_form = json.dumps(raw_json_data)
  581. # click.secho(f"Updated data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  582. db.session.commit()
  583. except Exception as e:
  584. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  585. fg='red')
  586. continue
  587. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  588. pbar.update(len(data_batch))
  589. @click.command('migrate_default_input_to_dataset_query_variable')
  590. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  591. def migrate_default_input_to_dataset_query_variable(batch_size):
  592. click.secho("Starting...", fg='green')
  593. total_records = db.session.query(AppModelConfig) \
  594. .join(App, App.app_model_config_id == AppModelConfig.id) \
  595. .filter(App.mode == 'completion') \
  596. .filter(AppModelConfig.dataset_query_variable == None) \
  597. .count()
  598. if total_records == 0:
  599. click.secho("No data to migrate.", fg='green')
  600. return
  601. num_batches = (total_records + batch_size - 1) // batch_size
  602. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  603. for i in range(num_batches):
  604. offset = i * batch_size
  605. limit = min(batch_size, total_records - offset)
  606. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  607. data_batch = db.session.query(AppModelConfig) \
  608. .join(App, App.app_model_config_id == AppModelConfig.id) \
  609. .filter(App.mode == 'completion') \
  610. .filter(AppModelConfig.dataset_query_variable == None) \
  611. .order_by(App.created_at) \
  612. .offset(offset).limit(limit).all()
  613. if not data_batch:
  614. click.secho("No more data to migrate.", fg='green')
  615. break
  616. try:
  617. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  618. for data in data_batch:
  619. config = AppModelConfig.to_dict(data)
  620. tools = config["agent_mode"]["tools"]
  621. dataset_exists = "dataset" in str(tools)
  622. if not dataset_exists:
  623. continue
  624. user_input_form = config.get("user_input_form", [])
  625. for form in user_input_form:
  626. paragraph = form.get('paragraph')
  627. if paragraph \
  628. and paragraph.get('variable') == 'query':
  629. data.dataset_query_variable = 'query'
  630. break
  631. if paragraph \
  632. and paragraph.get('variable') == 'default_input':
  633. data.dataset_query_variable = 'default_input'
  634. break
  635. db.session.commit()
  636. except Exception as e:
  637. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  638. fg='red')
  639. continue
  640. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  641. pbar.update(len(data_batch))
  642. @click.command('add-annotation-question-field-value', help='add annotation question value')
  643. def add_annotation_question_field_value():
  644. click.echo(click.style('Start add annotation question value.', fg='green'))
  645. message_annotations = db.session.query(MessageAnnotation).all()
  646. message_annotation_deal_count = 0
  647. if message_annotations:
  648. for message_annotation in message_annotations:
  649. try:
  650. if message_annotation.message_id and not message_annotation.question:
  651. message = db.session.query(Message).filter(
  652. Message.id == message_annotation.message_id
  653. ).first()
  654. message_annotation.question = message.query
  655. db.session.add(message_annotation)
  656. db.session.commit()
  657. message_annotation_deal_count += 1
  658. except Exception as e:
  659. click.echo(
  660. click.style('Add annotation question value error: {} {}'.format(e.__class__.__name__, str(e)),
  661. fg='red'))
  662. click.echo(
  663. click.style(f'Congratulations! add annotation question value successful. Deal count {message_annotation_deal_count}', fg='green'))
  664. def register_commands(app):
  665. app.cli.add_command(reset_password)
  666. app.cli.add_command(reset_email)
  667. app.cli.add_command(generate_invitation_codes)
  668. app.cli.add_command(reset_encrypt_key_pair)
  669. app.cli.add_command(recreate_all_dataset_indexes)
  670. app.cli.add_command(sync_anthropic_hosted_providers)
  671. app.cli.add_command(clean_unused_dataset_indexes)
  672. app.cli.add_command(create_qdrant_indexes)
  673. app.cli.add_command(update_qdrant_indexes)
  674. app.cli.add_command(update_app_model_configs)
  675. app.cli.add_command(normalization_collections)
  676. app.cli.add_command(migrate_default_input_to_dataset_query_variable)
  677. app.cli.add_command(add_qdrant_full_text_index)
  678. app.cli.add_command(add_annotation_question_field_value)