semantic-chunking
Version:
Semantically create chunks from large texts. Useful for workflows involving large language models (LLMs).
18 lines • 570 B
JavaScript
export const DEFAULT_CONFIG = {
LOGGING: false,
MAX_TOKEN_SIZE: 500,
SIMILARITY_THRESHOLD: 0.5,
DYNAMIC_THRESHOLD_LOWER_BOUND: 0.4,
DYNAMIC_THRESHOLD_UPPER_BOUND: 0.8,
NUM_SIMILARITY_SENTENCES_LOOKAHEAD: 3,
COMBINE_CHUNKS: true,
COMBINE_CHUNKS_SIMILARITY_THRESHOLD: 0.5,
ONNX_EMBEDDING_MODEL: "Xenova/all-MiniLM-L6-v2",
DTYPE: 'q8',
LOCAL_MODEL_PATH: "./models",
MODEL_CACHE_DIR: "./models",
RETURN_EMBEDDING: false,
RETURN_TOKEN_LENGTH: true,
CHUNK_PREFIX: null,
EXCLUDE_CHUNK_PREFIX_IN_RESULTS: false,
};