|
@@ -122,7 +122,8 @@ const StepTwo = ({
|
|
|
const setSegmentIdentifier = useCallback((value: string) => {
|
|
|
doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
|
|
|
}, [])
|
|
|
- const [max, setMax] = useState(4000) // default chunk length
|
|
|
+ const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length
|
|
|
+ const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
|
|
|
const [overlap, setOverlap] = useState(50)
|
|
|
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
|
|
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
|
@@ -196,7 +197,7 @@ const StepTwo = ({
|
|
|
const resetRules = () => {
|
|
|
if (defaultConfig) {
|
|
|
setSegmentIdentifier(defaultConfig.segmentation.separator)
|
|
|
- setMax(defaultConfig.segmentation.max_tokens)
|
|
|
+ setMaxChunkLength(defaultConfig.segmentation.max_tokens)
|
|
|
setOverlap(defaultConfig.segmentation.chunk_overlap)
|
|
|
setRules(defaultConfig.pre_processing_rules)
|
|
|
}
|
|
@@ -212,8 +213,8 @@ const StepTwo = ({
|
|
|
}
|
|
|
|
|
|
const confirmChangeCustomConfig = () => {
|
|
|
- if (segmentationType === SegmentType.CUSTOM && max > 4000) {
|
|
|
- Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
|
|
|
+ if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
|
|
|
+ Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
|
|
|
return
|
|
|
}
|
|
|
setCustomFileIndexingEstimate(null)
|
|
@@ -234,7 +235,7 @@ const StepTwo = ({
|
|
|
pre_processing_rules: rules,
|
|
|
segmentation: {
|
|
|
separator: unescape(segmentIdentifier),
|
|
|
- max_tokens: max,
|
|
|
+ max_tokens: maxChunkLength,
|
|
|
chunk_overlap: overlap,
|
|
|
},
|
|
|
}
|
|
@@ -339,12 +340,12 @@ const StepTwo = ({
|
|
|
)
|
|
|
const getCreationParams = () => {
|
|
|
let params
|
|
|
- if (segmentationType === SegmentType.CUSTOM && overlap > max) {
|
|
|
+ if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
|
|
|
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
|
|
|
return
|
|
|
}
|
|
|
- if (segmentationType === SegmentType.CUSTOM && max > 4000) {
|
|
|
- Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
|
|
|
+ if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
|
|
|
+ Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
|
|
|
return
|
|
|
}
|
|
|
if (isSetting) {
|
|
@@ -415,7 +416,8 @@ const StepTwo = ({
|
|
|
const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
|
|
|
const separator = res.rules.segmentation.separator
|
|
|
setSegmentIdentifier(separator)
|
|
|
- setMax(res.rules.segmentation.max_tokens)
|
|
|
+ setMaxChunkLength(res.rules.segmentation.max_tokens)
|
|
|
+ setLimitMaxChunkLength(res.limits.indexing_max_segmentation_tokens_length)
|
|
|
setOverlap(res.rules.segmentation.chunk_overlap)
|
|
|
setRules(res.rules.pre_processing_rules)
|
|
|
setDefaultConfig(res.rules)
|
|
@@ -432,7 +434,7 @@ const StepTwo = ({
|
|
|
const max = rules.segmentation.max_tokens
|
|
|
const overlap = rules.segmentation.chunk_overlap
|
|
|
setSegmentIdentifier(separator)
|
|
|
- setMax(max)
|
|
|
+ setMaxChunkLength(max)
|
|
|
setOverlap(overlap)
|
|
|
setRules(rules.pre_processing_rules)
|
|
|
setDefaultConfig(rules)
|
|
@@ -670,10 +672,10 @@ const StepTwo = ({
|
|
|
type="number"
|
|
|
className='h-9'
|
|
|
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
|
|
- value={max}
|
|
|
- max={4000}
|
|
|
+ value={maxChunkLength}
|
|
|
+ max={limitMaxChunkLength}
|
|
|
min={1}
|
|
|
- onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
+ onChange={e => setMaxChunkLength(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
/>
|
|
|
</div>
|
|
|
</div>
|