|
@@ -52,7 +52,7 @@ class BaseIndexProcessor(ABC):
|
|
|
|
|
|
character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
|
|
|
chunk_size=segmentation["max_tokens"],
|
|
|
- chunk_overlap=0,
|
|
|
+ chunk_overlap=segmentation.get('chunk_overlap', 0),
|
|
|
fixed_separator=separator,
|
|
|
separators=["\n\n", "。", ".", " ", ""],
|
|
|
embedding_model_instance=embedding_model_instance
|
|
@@ -61,7 +61,7 @@ class BaseIndexProcessor(ABC):
|
|
|
# Automatic segmentation
|
|
|
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_encoder(
|
|
|
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
|
|
|
- chunk_overlap=0,
|
|
|
+ chunk_overlap=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['chunk_overlap'],
|
|
|
separators=["\n\n", "。", ".", " ", ""],
|
|
|
embedding_model_instance=embedding_model_instance
|
|
|
)
|