|
@@ -245,7 +245,7 @@ class DatasetService:
|
|
|
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
|
|
|
)
|
|
|
except ProviderTokenNotInitError as ex:
|
|
|
- raise ValueError(f"The dataset in unavailable, due to: {ex.description}")
|
|
|
+ raise ValueError(ex.description)
|
|
|
|
|
|
@staticmethod
|
|
|
def update_dataset(dataset_id, data, user):
|
|
@@ -327,31 +327,75 @@ class DatasetService:
|
|
|
raise ValueError(ex.description)
|
|
|
else:
|
|
|
# add default plugin id to both setting sets, to make sure the plugin model provider is consistent
|
|
|
- plugin_model_provider = dataset.embedding_model_provider
|
|
|
- plugin_model_provider = str(ModelProviderID(plugin_model_provider))
|
|
|
-
|
|
|
- new_plugin_model_provider = data["embedding_model_provider"]
|
|
|
- new_plugin_model_provider = str(ModelProviderID(new_plugin_model_provider))
|
|
|
-
|
|
|
+ # Skip embedding model checks if not provided in the update request
|
|
|
if (
|
|
|
- new_plugin_model_provider != plugin_model_provider
|
|
|
- or data["embedding_model"] != dataset.embedding_model
|
|
|
+ "embedding_model_provider" not in data
|
|
|
+ or "embedding_model" not in data
|
|
|
+ or not data.get("embedding_model_provider")
|
|
|
+ or not data.get("embedding_model")
|
|
|
):
|
|
|
- action = "update"
|
|
|
+ # If the dataset already has embedding model settings, use those
|
|
|
+ if dataset.embedding_model_provider and dataset.embedding_model:
|
|
|
+ # Keep existing values
|
|
|
+ filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
|
|
|
+ filtered_data["embedding_model"] = dataset.embedding_model
|
|
|
+ # If collection_binding_id exists, keep it too
|
|
|
+ if dataset.collection_binding_id:
|
|
|
+ filtered_data["collection_binding_id"] = dataset.collection_binding_id
|
|
|
+ # Otherwise, don't try to update embedding model settings at all
|
|
|
+ # Remove these fields from filtered_data if they exist but are None/empty
|
|
|
+ if "embedding_model_provider" in filtered_data and not filtered_data["embedding_model_provider"]:
|
|
|
+ del filtered_data["embedding_model_provider"]
|
|
|
+ if "embedding_model" in filtered_data and not filtered_data["embedding_model"]:
|
|
|
+ del filtered_data["embedding_model"]
|
|
|
+ else:
|
|
|
+ skip_embedding_update = False
|
|
|
try:
|
|
|
- model_manager = ModelManager()
|
|
|
- embedding_model = model_manager.get_model_instance(
|
|
|
- tenant_id=current_user.current_tenant_id,
|
|
|
- provider=data["embedding_model_provider"],
|
|
|
- model_type=ModelType.TEXT_EMBEDDING,
|
|
|
- model=data["embedding_model"],
|
|
|
- )
|
|
|
- filtered_data["embedding_model"] = embedding_model.model
|
|
|
- filtered_data["embedding_model_provider"] = embedding_model.provider
|
|
|
- dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
|
|
|
- embedding_model.provider, embedding_model.model
|
|
|
- )
|
|
|
- filtered_data["collection_binding_id"] = dataset_collection_binding.id
|
|
|
+ # Handle existing model provider
|
|
|
+ plugin_model_provider = dataset.embedding_model_provider
|
|
|
+ plugin_model_provider_str = None
|
|
|
+ if plugin_model_provider:
|
|
|
+ plugin_model_provider_str = str(ModelProviderID(plugin_model_provider))
|
|
|
+
|
|
|
+ # Handle new model provider from request
|
|
|
+ new_plugin_model_provider = data["embedding_model_provider"]
|
|
|
+ new_plugin_model_provider_str = None
|
|
|
+ if new_plugin_model_provider:
|
|
|
+ new_plugin_model_provider_str = str(ModelProviderID(new_plugin_model_provider))
|
|
|
+
|
|
|
+ # Only update embedding model if both values are provided and different from current
|
|
|
+ if (
|
|
|
+ plugin_model_provider_str != new_plugin_model_provider_str
|
|
|
+ or data["embedding_model"] != dataset.embedding_model
|
|
|
+ ):
|
|
|
+ action = "update"
|
|
|
+ model_manager = ModelManager()
|
|
|
+ try:
|
|
|
+ embedding_model = model_manager.get_model_instance(
|
|
|
+ tenant_id=current_user.current_tenant_id,
|
|
|
+ provider=data["embedding_model_provider"],
|
|
|
+ model_type=ModelType.TEXT_EMBEDDING,
|
|
|
+ model=data["embedding_model"],
|
|
|
+ )
|
|
|
+ except ProviderTokenNotInitError:
|
|
|
+ # If we can't get the embedding model, skip updating it
|
|
|
+ # and keep the existing settings if available
|
|
|
+ if dataset.embedding_model_provider and dataset.embedding_model:
|
|
|
+ filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
|
|
|
+ filtered_data["embedding_model"] = dataset.embedding_model
|
|
|
+ if dataset.collection_binding_id:
|
|
|
+ filtered_data["collection_binding_id"] = dataset.collection_binding_id
|
|
|
+ # Skip the rest of the embedding model update
|
|
|
+ skip_embedding_update = True
|
|
|
+ if not skip_embedding_update:
|
|
|
+ filtered_data["embedding_model"] = embedding_model.model
|
|
|
+ filtered_data["embedding_model_provider"] = embedding_model.provider
|
|
|
+ dataset_collection_binding = (
|
|
|
+ DatasetCollectionBindingService.get_dataset_collection_binding(
|
|
|
+ embedding_model.provider, embedding_model.model
|
|
|
+ )
|
|
|
+ )
|
|
|
+ filtered_data["collection_binding_id"] = dataset_collection_binding.id
|
|
|
except LLMBadRequestError:
|
|
|
raise ValueError(
|
|
|
"No Embedding Model available. Please configure a valid provider "
|