commands.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. import base64
  2. import datetime
  3. import json
  4. import math
  5. import random
  6. import secrets
  7. import string
  8. import threading
  9. import time
  10. import uuid
  11. import click
  12. import qdrant_client
  13. from core.embedding.cached_embedding import CacheEmbedding
  14. from core.index.index import IndexBuilder
  15. from core.model_manager import ModelManager
  16. from core.model_runtime.entities.model_entities import ModelType
  17. from extensions.ext_database import db
  18. from flask import Flask, current_app
  19. from libs.helper import email as email_validate
  20. from libs.password import hash_password, password_pattern, valid_password
  21. from libs.rsa import generate_key_pair
  22. from models.account import InvitationCode, Tenant, TenantAccountJoin
  23. from models.dataset import Dataset, DatasetCollectionBinding, DatasetQuery, Document
  24. from models.model import Account, App, AppModelConfig, Message, MessageAnnotation, InstalledApp
  25. from models.provider import Provider, ProviderModel, ProviderQuotaType, ProviderType
  26. from qdrant_client.http.models import TextIndexParams, TextIndexType, TokenizerType
  27. from tqdm import tqdm
  28. from werkzeug.exceptions import NotFound
  29. @click.command('reset-password', help='Reset the account password.')
  30. @click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
  31. @click.option('--new-password', prompt=True, help='the new password.')
  32. @click.option('--password-confirm', prompt=True, help='the new password confirm.')
  33. def reset_password(email, new_password, password_confirm):
  34. if str(new_password).strip() != str(password_confirm).strip():
  35. click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
  36. return
  37. account = db.session.query(Account). \
  38. filter(Account.email == email). \
  39. one_or_none()
  40. if not account:
  41. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  42. return
  43. try:
  44. valid_password(new_password)
  45. except:
  46. click.echo(
  47. click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
  48. return
  49. # generate password salt
  50. salt = secrets.token_bytes(16)
  51. base64_salt = base64.b64encode(salt).decode()
  52. # encrypt password with salt
  53. password_hashed = hash_password(new_password, salt)
  54. base64_password_hashed = base64.b64encode(password_hashed).decode()
  55. account.password = base64_password_hashed
  56. account.password_salt = base64_salt
  57. db.session.commit()
  58. click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
  59. @click.command('reset-email', help='Reset the account email.')
  60. @click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
  61. @click.option('--new-email', prompt=True, help='the new email.')
  62. @click.option('--email-confirm', prompt=True, help='the new email confirm.')
  63. def reset_email(email, new_email, email_confirm):
  64. if str(new_email).strip() != str(email_confirm).strip():
  65. click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
  66. return
  67. account = db.session.query(Account). \
  68. filter(Account.email == email). \
  69. one_or_none()
  70. if not account:
  71. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  72. return
  73. try:
  74. email_validate(new_email)
  75. except:
  76. click.echo(
  77. click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
  78. return
  79. account.email = new_email
  80. db.session.commit()
  81. click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
  82. @click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. '
  83. 'After the reset, all LLM credentials will become invalid, '
  84. 'requiring re-entry.'
  85. 'Only support SELF_HOSTED mode.')
  86. @click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?'
  87. ' this operation cannot be rolled back!', fg='red'))
  88. def reset_encrypt_key_pair():
  89. if current_app.config['EDITION'] != 'SELF_HOSTED':
  90. click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
  91. return
  92. tenant = db.session.query(Tenant).first()
  93. if not tenant:
  94. click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
  95. return
  96. tenant.encrypt_public_key = generate_key_pair(tenant.id)
  97. db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
  98. db.session.query(ProviderModel).delete()
  99. db.session.commit()
  100. click.echo(click.style('Congratulations! '
  101. 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
  102. @click.command('generate-invitation-codes', help='Generate invitation codes.')
  103. @click.option('--batch', help='The batch of invitation codes.')
  104. @click.option('--count', prompt=True, help='Invitation codes count.')
  105. def generate_invitation_codes(batch, count):
  106. if not batch:
  107. now = datetime.datetime.now()
  108. batch = now.strftime('%Y%m%d%H%M%S')
  109. if not count or int(count) <= 0:
  110. click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
  111. return
  112. count = int(count)
  113. click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
  114. codes = ''
  115. for i in range(count):
  116. code = generate_invitation_code()
  117. invitation_code = InvitationCode(
  118. code=code,
  119. batch=batch
  120. )
  121. db.session.add(invitation_code)
  122. click.echo(code)
  123. codes += code + "\n"
  124. db.session.commit()
  125. filename = 'storage/invitation-codes-{}.txt'.format(batch)
  126. with open(filename, 'w') as f:
  127. f.write(codes)
  128. click.echo(click.style(
  129. 'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
  130. filename),
  131. fg='green'))
  132. def generate_invitation_code():
  133. code = generate_upper_string()
  134. while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
  135. code = generate_upper_string()
  136. return code
  137. def generate_upper_string():
  138. letters_digits = string.ascii_uppercase + string.digits
  139. result = ""
  140. for i in range(8):
  141. result += random.choice(letters_digits)
  142. return result
  143. @click.command('recreate-all-dataset-indexes', help='Recreate all dataset indexes.')
  144. def recreate_all_dataset_indexes():
  145. click.echo(click.style('Start recreate all dataset indexes.', fg='green'))
  146. recreate_count = 0
  147. page = 1
  148. while True:
  149. try:
  150. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  151. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  152. except NotFound:
  153. break
  154. page += 1
  155. for dataset in datasets:
  156. try:
  157. click.echo('Recreating dataset index: {}'.format(dataset.id))
  158. index = IndexBuilder.get_index(dataset, 'high_quality')
  159. if index and index._is_origin():
  160. index.recreate_dataset(dataset)
  161. recreate_count += 1
  162. else:
  163. click.echo('passed.')
  164. except Exception as e:
  165. click.echo(
  166. click.style('Recreate dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
  167. continue
  168. click.echo(click.style('Congratulations! Recreate {} dataset indexes.'.format(recreate_count), fg='green'))
  169. @click.command('clean-unused-dataset-indexes', help='Clean unused dataset indexes.')
  170. def clean_unused_dataset_indexes():
  171. click.echo(click.style('Start clean unused dataset indexes.', fg='green'))
  172. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  173. start_at = time.perf_counter()
  174. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  175. page = 1
  176. while True:
  177. try:
  178. datasets = db.session.query(Dataset).filter(Dataset.created_at < thirty_days_ago) \
  179. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  180. except NotFound:
  181. break
  182. page += 1
  183. for dataset in datasets:
  184. dataset_query = db.session.query(DatasetQuery).filter(
  185. DatasetQuery.created_at > thirty_days_ago,
  186. DatasetQuery.dataset_id == dataset.id
  187. ).all()
  188. if not dataset_query or len(dataset_query) == 0:
  189. documents = db.session.query(Document).filter(
  190. Document.dataset_id == dataset.id,
  191. Document.indexing_status == 'completed',
  192. Document.enabled == True,
  193. Document.archived == False,
  194. Document.updated_at > thirty_days_ago
  195. ).all()
  196. if not documents or len(documents) == 0:
  197. try:
  198. # remove index
  199. vector_index = IndexBuilder.get_index(dataset, 'high_quality')
  200. kw_index = IndexBuilder.get_index(dataset, 'economy')
  201. # delete from vector index
  202. if vector_index:
  203. if dataset.collection_binding_id:
  204. vector_index.delete_by_group_id(dataset.id)
  205. else:
  206. if dataset.collection_binding_id:
  207. vector_index.delete_by_group_id(dataset.id)
  208. else:
  209. vector_index.delete()
  210. kw_index.delete()
  211. # update document
  212. update_params = {
  213. Document.enabled: False
  214. }
  215. Document.query.filter_by(dataset_id=dataset.id).update(update_params)
  216. db.session.commit()
  217. click.echo(click.style('Cleaned unused dataset {} from db success!'.format(dataset.id),
  218. fg='green'))
  219. except Exception as e:
  220. click.echo(
  221. click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  222. fg='red'))
  223. end_at = time.perf_counter()
  224. click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
  225. @click.command('sync-anthropic-hosted-providers', help='Sync anthropic hosted providers.')
  226. def sync_anthropic_hosted_providers():
  227. if not hosted_model_providers.anthropic:
  228. click.echo(click.style('Anthropic hosted provider is not configured.', fg='red'))
  229. return
  230. click.echo(click.style('Start sync anthropic hosted providers.', fg='green'))
  231. count = 0
  232. new_quota_limit = hosted_model_providers.anthropic.quota_limit
  233. page = 1
  234. while True:
  235. try:
  236. providers = db.session.query(Provider).filter(
  237. Provider.provider_name == 'anthropic',
  238. Provider.provider_type == ProviderType.SYSTEM.value,
  239. Provider.quota_type == ProviderQuotaType.TRIAL.value,
  240. Provider.quota_limit != new_quota_limit
  241. ).order_by(Provider.created_at.desc()).paginate(page=page, per_page=100)
  242. except NotFound:
  243. break
  244. page += 1
  245. for provider in providers:
  246. try:
  247. click.echo('Syncing tenant anthropic hosted provider: {}, origin: limit {}, used {}'
  248. .format(provider.tenant_id, provider.quota_limit, provider.quota_used))
  249. original_quota_limit = provider.quota_limit
  250. division = math.ceil(new_quota_limit / 1000)
  251. provider.quota_limit = new_quota_limit if original_quota_limit == 1000 \
  252. else original_quota_limit * division
  253. provider.quota_used = division * provider.quota_used
  254. db.session.commit()
  255. count += 1
  256. except Exception as e:
  257. click.echo(click.style(
  258. 'Sync tenant anthropic hosted provider error: {} {}'.format(e.__class__.__name__, str(e)),
  259. fg='red'))
  260. continue
  261. click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
  262. @click.command('create-qdrant-indexes', help='Create qdrant indexes.')
  263. def create_qdrant_indexes():
  264. click.echo(click.style('Start create qdrant indexes.', fg='green'))
  265. create_count = 0
  266. page = 1
  267. while True:
  268. try:
  269. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  270. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  271. except NotFound:
  272. break
  273. model_manager = ModelManager()
  274. page += 1
  275. for dataset in datasets:
  276. if dataset.index_struct_dict:
  277. if dataset.index_struct_dict['type'] != 'qdrant':
  278. try:
  279. click.echo('Create dataset qdrant index: {}'.format(dataset.id))
  280. try:
  281. embedding_model = model_manager.get_model_instance(
  282. tenant_id=dataset.tenant_id,
  283. provider=dataset.embedding_model_provider,
  284. model_type=ModelType.TEXT_EMBEDDING,
  285. model=dataset.embedding_model
  286. )
  287. except Exception:
  288. try:
  289. embedding_model = model_manager.get_default_model_instance(
  290. tenant_id=dataset.tenant_id,
  291. model_type=ModelType.TEXT_EMBEDDING,
  292. )
  293. dataset.embedding_model = embedding_model.model
  294. dataset.embedding_model_provider = embedding_model.provider
  295. except Exception:
  296. provider = Provider(
  297. id='provider_id',
  298. tenant_id=dataset.tenant_id,
  299. provider_name='openai',
  300. provider_type=ProviderType.SYSTEM.value,
  301. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  302. is_valid=True,
  303. )
  304. model_provider = OpenAIProvider(provider=provider)
  305. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  306. model_provider=model_provider)
  307. embeddings = CacheEmbedding(embedding_model)
  308. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  309. index = QdrantVectorIndex(
  310. dataset=dataset,
  311. config=QdrantConfig(
  312. endpoint=current_app.config.get('QDRANT_URL'),
  313. api_key=current_app.config.get('QDRANT_API_KEY'),
  314. root_path=current_app.root_path
  315. ),
  316. embeddings=embeddings
  317. )
  318. if index:
  319. index.create_qdrant_dataset(dataset)
  320. index_struct = {
  321. "type": 'qdrant',
  322. "vector_store": {
  323. "class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']}
  324. }
  325. dataset.index_struct = json.dumps(index_struct)
  326. db.session.commit()
  327. create_count += 1
  328. else:
  329. click.echo('passed.')
  330. except Exception as e:
  331. click.echo(
  332. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  333. fg='red'))
  334. continue
  335. click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
  336. @click.command('update-qdrant-indexes', help='Update qdrant indexes.')
  337. def update_qdrant_indexes():
  338. click.echo(click.style('Start Update qdrant indexes.', fg='green'))
  339. create_count = 0
  340. page = 1
  341. while True:
  342. try:
  343. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  344. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  345. except NotFound:
  346. break
  347. page += 1
  348. for dataset in datasets:
  349. if dataset.index_struct_dict:
  350. if dataset.index_struct_dict['type'] != 'qdrant':
  351. try:
  352. click.echo('Update dataset qdrant index: {}'.format(dataset.id))
  353. try:
  354. embedding_model = ModelFactory.get_embedding_model(
  355. tenant_id=dataset.tenant_id,
  356. model_provider_name=dataset.embedding_model_provider,
  357. model_name=dataset.embedding_model
  358. )
  359. except Exception:
  360. provider = Provider(
  361. id='provider_id',
  362. tenant_id=dataset.tenant_id,
  363. provider_name='openai',
  364. provider_type=ProviderType.CUSTOM.value,
  365. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  366. is_valid=True,
  367. )
  368. model_provider = OpenAIProvider(provider=provider)
  369. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  370. model_provider=model_provider)
  371. embeddings = CacheEmbedding(embedding_model)
  372. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  373. index = QdrantVectorIndex(
  374. dataset=dataset,
  375. config=QdrantConfig(
  376. endpoint=current_app.config.get('QDRANT_URL'),
  377. api_key=current_app.config.get('QDRANT_API_KEY'),
  378. root_path=current_app.root_path
  379. ),
  380. embeddings=embeddings
  381. )
  382. if index:
  383. index.update_qdrant_dataset(dataset)
  384. create_count += 1
  385. else:
  386. click.echo('passed.')
  387. except Exception as e:
  388. click.echo(
  389. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  390. fg='red'))
  391. continue
  392. click.echo(click.style('Congratulations! Update {} dataset indexes.'.format(create_count), fg='green'))
  393. @click.command('normalization-collections', help='restore all collections in one')
  394. def normalization_collections():
  395. click.echo(click.style('Start normalization collections.', fg='green'))
  396. normalization_count = []
  397. page = 1
  398. while True:
  399. try:
  400. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  401. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=100)
  402. except NotFound:
  403. break
  404. datasets_result = datasets.items
  405. page += 1
  406. for i in range(0, len(datasets_result), 5):
  407. threads = []
  408. sub_datasets = datasets_result[i:i + 5]
  409. for dataset in sub_datasets:
  410. document_format_thread = threading.Thread(target=deal_dataset_vector, kwargs={
  411. 'flask_app': current_app._get_current_object(),
  412. 'dataset': dataset,
  413. 'normalization_count': normalization_count
  414. })
  415. threads.append(document_format_thread)
  416. document_format_thread.start()
  417. for thread in threads:
  418. thread.join()
  419. click.echo(click.style('Congratulations! restore {} dataset indexes.'.format(len(normalization_count)), fg='green'))
  420. @click.command('add-qdrant-full-text-index', help='add qdrant full text index')
  421. def add_qdrant_full_text_index():
  422. click.echo(click.style('Start add full text index.', fg='green'))
  423. binds = db.session.query(DatasetCollectionBinding).all()
  424. if binds and current_app.config['VECTOR_STORE'] == 'qdrant':
  425. qdrant_url = current_app.config['QDRANT_URL']
  426. qdrant_api_key = current_app.config['QDRANT_API_KEY']
  427. client = qdrant_client.QdrantClient(
  428. qdrant_url,
  429. api_key=qdrant_api_key, # For Qdrant Cloud, None for local instance
  430. )
  431. for bind in binds:
  432. try:
  433. text_index_params = TextIndexParams(
  434. type=TextIndexType.TEXT,
  435. tokenizer=TokenizerType.MULTILINGUAL,
  436. min_token_len=2,
  437. max_token_len=20,
  438. lowercase=True
  439. )
  440. client.create_payload_index(bind.collection_name, 'page_content',
  441. field_schema=text_index_params)
  442. except Exception as e:
  443. click.echo(
  444. click.style('Create full text index error: {} {}'.format(e.__class__.__name__, str(e)),
  445. fg='red'))
  446. click.echo(
  447. click.style(
  448. 'Congratulations! add collection {} full text index successful.'.format(bind.collection_name),
  449. fg='green'))
  450. def deal_dataset_vector(flask_app: Flask, dataset: Dataset, normalization_count: list):
  451. with flask_app.app_context():
  452. try:
  453. click.echo('restore dataset index: {}'.format(dataset.id))
  454. try:
  455. embedding_model = ModelFactory.get_embedding_model(
  456. tenant_id=dataset.tenant_id,
  457. model_provider_name=dataset.embedding_model_provider,
  458. model_name=dataset.embedding_model
  459. )
  460. except Exception:
  461. provider = Provider(
  462. id='provider_id',
  463. tenant_id=dataset.tenant_id,
  464. provider_name='openai',
  465. provider_type=ProviderType.CUSTOM.value,
  466. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  467. is_valid=True,
  468. )
  469. model_provider = OpenAIProvider(provider=provider)
  470. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  471. model_provider=model_provider)
  472. embeddings = CacheEmbedding(embedding_model)
  473. dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
  474. filter(DatasetCollectionBinding.provider_name == embedding_model.model_provider.provider_name,
  475. DatasetCollectionBinding.model_name == embedding_model.name). \
  476. order_by(DatasetCollectionBinding.created_at). \
  477. first()
  478. if not dataset_collection_binding:
  479. dataset_collection_binding = DatasetCollectionBinding(
  480. provider_name=embedding_model.model_provider.provider_name,
  481. model_name=embedding_model.name,
  482. collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node'
  483. )
  484. db.session.add(dataset_collection_binding)
  485. db.session.commit()
  486. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  487. index = QdrantVectorIndex(
  488. dataset=dataset,
  489. config=QdrantConfig(
  490. endpoint=current_app.config.get('QDRANT_URL'),
  491. api_key=current_app.config.get('QDRANT_API_KEY'),
  492. root_path=current_app.root_path
  493. ),
  494. embeddings=embeddings
  495. )
  496. if index:
  497. # index.delete_by_group_id(dataset.id)
  498. index.restore_dataset_in_one(dataset, dataset_collection_binding)
  499. else:
  500. click.echo('passed.')
  501. normalization_count.append(1)
  502. except Exception as e:
  503. click.echo(
  504. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  505. fg='red'))
  506. @click.command('update_app_model_configs', help='Migrate data to support paragraph variable.')
  507. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  508. def update_app_model_configs(batch_size):
  509. pre_prompt_template = '{{default_input}}'
  510. user_input_form_template = {
  511. "en-US": [
  512. {
  513. "paragraph": {
  514. "label": "Query",
  515. "variable": "default_input",
  516. "required": False,
  517. "default": ""
  518. }
  519. }
  520. ],
  521. "zh-Hans": [
  522. {
  523. "paragraph": {
  524. "label": "查询内容",
  525. "variable": "default_input",
  526. "required": False,
  527. "default": ""
  528. }
  529. }
  530. ]
  531. }
  532. click.secho("Start migrate old data that the text generator can support paragraph variable.", fg='green')
  533. total_records = db.session.query(AppModelConfig) \
  534. .join(App, App.app_model_config_id == AppModelConfig.id) \
  535. .filter(App.mode == 'completion') \
  536. .count()
  537. if total_records == 0:
  538. click.secho("No data to migrate.", fg='green')
  539. return
  540. num_batches = (total_records + batch_size - 1) // batch_size
  541. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  542. for i in range(num_batches):
  543. offset = i * batch_size
  544. limit = min(batch_size, total_records - offset)
  545. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  546. data_batch = db.session.query(AppModelConfig) \
  547. .join(App, App.app_model_config_id == AppModelConfig.id) \
  548. .filter(App.mode == 'completion') \
  549. .order_by(App.created_at) \
  550. .offset(offset).limit(limit).all()
  551. if not data_batch:
  552. click.secho("No more data to migrate.", fg='green')
  553. break
  554. try:
  555. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  556. for data in data_batch:
  557. # click.secho(f"Migrating data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  558. if data.pre_prompt is None:
  559. data.pre_prompt = pre_prompt_template
  560. else:
  561. if pre_prompt_template in data.pre_prompt:
  562. continue
  563. data.pre_prompt += pre_prompt_template
  564. app_data = db.session.query(App) \
  565. .filter(App.id == data.app_id) \
  566. .one()
  567. account_data = db.session.query(Account) \
  568. .join(TenantAccountJoin, Account.id == TenantAccountJoin.account_id) \
  569. .filter(TenantAccountJoin.role == 'owner') \
  570. .filter(TenantAccountJoin.tenant_id == app_data.tenant_id) \
  571. .one_or_none()
  572. if not account_data:
  573. continue
  574. if data.user_input_form is None or data.user_input_form == 'null':
  575. data.user_input_form = json.dumps(user_input_form_template[account_data.interface_language])
  576. else:
  577. raw_json_data = json.loads(data.user_input_form)
  578. raw_json_data.append(user_input_form_template[account_data.interface_language][0])
  579. data.user_input_form = json.dumps(raw_json_data)
  580. # click.secho(f"Updated data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  581. db.session.commit()
  582. except Exception as e:
  583. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  584. fg='red')
  585. continue
  586. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  587. pbar.update(len(data_batch))
  588. @click.command('migrate_default_input_to_dataset_query_variable')
  589. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  590. def migrate_default_input_to_dataset_query_variable(batch_size):
  591. click.secho("Starting...", fg='green')
  592. total_records = db.session.query(AppModelConfig) \
  593. .join(App, App.app_model_config_id == AppModelConfig.id) \
  594. .filter(App.mode == 'completion') \
  595. .filter(AppModelConfig.dataset_query_variable == None) \
  596. .count()
  597. if total_records == 0:
  598. click.secho("No data to migrate.", fg='green')
  599. return
  600. num_batches = (total_records + batch_size - 1) // batch_size
  601. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  602. for i in range(num_batches):
  603. offset = i * batch_size
  604. limit = min(batch_size, total_records - offset)
  605. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  606. data_batch = db.session.query(AppModelConfig) \
  607. .join(App, App.app_model_config_id == AppModelConfig.id) \
  608. .filter(App.mode == 'completion') \
  609. .filter(AppModelConfig.dataset_query_variable == None) \
  610. .order_by(App.created_at) \
  611. .offset(offset).limit(limit).all()
  612. if not data_batch:
  613. click.secho("No more data to migrate.", fg='green')
  614. break
  615. try:
  616. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  617. for data in data_batch:
  618. config = AppModelConfig.to_dict(data)
  619. tools = config["agent_mode"]["tools"]
  620. dataset_exists = "dataset" in str(tools)
  621. if not dataset_exists:
  622. continue
  623. user_input_form = config.get("user_input_form", [])
  624. for form in user_input_form:
  625. paragraph = form.get('paragraph')
  626. if paragraph \
  627. and paragraph.get('variable') == 'query':
  628. data.dataset_query_variable = 'query'
  629. break
  630. if paragraph \
  631. and paragraph.get('variable') == 'default_input':
  632. data.dataset_query_variable = 'default_input'
  633. break
  634. db.session.commit()
  635. except Exception as e:
  636. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  637. fg='red')
  638. continue
  639. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  640. pbar.update(len(data_batch))
  641. @click.command('add-annotation-question-field-value', help='add annotation question value')
  642. def add_annotation_question_field_value():
  643. click.echo(click.style('Start add annotation question value.', fg='green'))
  644. message_annotations = db.session.query(MessageAnnotation).all()
  645. message_annotation_deal_count = 0
  646. if message_annotations:
  647. for message_annotation in message_annotations:
  648. try:
  649. if message_annotation.message_id and not message_annotation.question:
  650. message = db.session.query(Message).filter(
  651. Message.id == message_annotation.message_id
  652. ).first()
  653. message_annotation.question = message.query
  654. db.session.add(message_annotation)
  655. db.session.commit()
  656. message_annotation_deal_count += 1
  657. except Exception as e:
  658. click.echo(
  659. click.style('Add annotation question value error: {} {}'.format(e.__class__.__name__, str(e)),
  660. fg='red'))
  661. click.echo(
  662. click.style(f'Congratulations! add annotation question value successful. Deal count {message_annotation_deal_count}', fg='green'))
  663. def register_commands(app):
  664. app.cli.add_command(reset_password)
  665. app.cli.add_command(reset_email)
  666. app.cli.add_command(generate_invitation_codes)
  667. app.cli.add_command(reset_encrypt_key_pair)
  668. app.cli.add_command(recreate_all_dataset_indexes)
  669. app.cli.add_command(sync_anthropic_hosted_providers)
  670. app.cli.add_command(clean_unused_dataset_indexes)
  671. app.cli.add_command(create_qdrant_indexes)
  672. app.cli.add_command(update_qdrant_indexes)
  673. app.cli.add_command(update_app_model_configs)
  674. app.cli.add_command(normalization_collections)
  675. app.cli.add_command(migrate_default_input_to_dataset_query_variable)
  676. app.cli.add_command(add_qdrant_full_text_index)
  677. app.cli.add_command(add_annotation_question_field_value)