|
@@ -93,17 +93,21 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter)
|
|
|
splits = list(text)
|
|
|
# Now go merging things, recursively splitting longer texts.
|
|
|
_good_splits = []
|
|
|
+ _good_splits_lengths = [] # cache the lengths of the splits
|
|
|
for s in splits:
|
|
|
- if self._length_function(s) < self._chunk_size:
|
|
|
+ s_len = self._length_function(s)
|
|
|
+ if s_len < self._chunk_size:
|
|
|
_good_splits.append(s)
|
|
|
+ _good_splits_lengths.append(s_len)
|
|
|
else:
|
|
|
if _good_splits:
|
|
|
- merged_text = self._merge_splits(_good_splits, separator)
|
|
|
+ merged_text = self._merge_splits(_good_splits, separator, _good_splits_lengths)
|
|
|
final_chunks.extend(merged_text)
|
|
|
_good_splits = []
|
|
|
+ _good_splits_lengths = []
|
|
|
other_info = self.recursive_split_text(s)
|
|
|
final_chunks.extend(other_info)
|
|
|
if _good_splits:
|
|
|
- merged_text = self._merge_splits(_good_splits, separator)
|
|
|
+ merged_text = self._merge_splits(_good_splits, separator, _good_splits_lengths)
|
|
|
final_chunks.extend(merged_text)
|
|
|
return final_chunks
|