code-chopper
Version:
A library for semantically dividing code written in various programming languages
168 lines (160 loc) • 7.19 kB
text/typescript
import Parser, { SyntaxNode } from 'tree-sitter';
/*!
* Copyright (c) ushirononeko 2025
* Copyright (c) sirasagi62 2025
* Published under MIT License
* see https://opensource.org/licenses/MIT
*
* This code was originally created by ushirononeko and modified by sirasagi62
* Original: https://github.com/ushironoko/gistdex
*/
interface ParserFactory {
createParser: (language: string) => Promise<Parser | null>;
dispose: () => void;
}
declare const createParserFactory: () => ParserFactory;
type BoundaryInfo = {
type: string;
level?: number;
name?: string;
parent?: string[];
docs?: string;
title?: string;
};
type BoundaryChunk = {
content: string;
startOffset: number;
endOffset: number;
boundary: BoundaryInfo;
filePath: string;
};
/*!
* Copyright (c) ushirononeko 2025
* Copyright (c) sirasagi62 2025
* Published under MIT License
* see https://opensource.org/licenses/MIT
*
* This code was originally created by ushirononeko and modified by sirasagi62
* Original: https://github.com/ushironoko/gistdex
*/
declare const LANGUAGE_NODE_TYPES: {
readonly javascript: {
readonly functions: readonly ["function_declaration", "function_expression"];
readonly classes: readonly ["class_declaration"];
readonly methods: readonly ["method_definition"];
readonly imports: readonly ["import_statement"];
readonly variables: readonly ["variable_declaration", "lexical_declaration"];
};
readonly typescript: {
readonly functions: readonly ["function_declaration", "function_expression"];
readonly classes: readonly ["class_declaration"];
readonly methods: readonly ["method_definition"];
readonly interfaces: readonly ["interface_declaration"];
readonly types: readonly ["type_alias_declaration"];
readonly imports: readonly ["import_statement"];
readonly variables: readonly ["variable_declaration", "lexical_declaration", "public_field_definition"];
};
readonly python: {
readonly functions: readonly ["function_definition"];
readonly classes: readonly ["class_definition"];
readonly methods: readonly ["function_definition"];
readonly imports: readonly ["import_statement", "import_from_statement"];
readonly variables: readonly ["assignment"];
};
readonly go: {
readonly functions: readonly ["function_declaration"];
readonly methods: readonly ["method_declaration"];
readonly types: readonly ["type_declaration"];
readonly imports: readonly ["import_declaration"];
readonly variables: readonly ["var_spec", "const_spec", "short_var_declaration"];
};
readonly rust: {
readonly functions: readonly ["function_item"];
readonly structs: readonly ["struct_item"];
readonly impls: readonly ["impl_item"];
readonly traits: readonly ["trait_item"];
readonly imports: readonly ["use_declaration"];
readonly variables: readonly ["let_declaration"];
};
readonly java: {
readonly functions: readonly ["method_declaration"];
readonly classes: readonly ["class_declaration"];
readonly interfaces: readonly ["interface_declaration"];
readonly imports: readonly ["import_declaration"];
readonly variables: readonly ["local_variable_declaration"];
};
readonly ruby: {
readonly functions: readonly ["method"];
readonly classes: readonly ["class"];
readonly modules: readonly ["module"];
readonly imports: readonly ["require", "load"];
readonly variables: readonly ["assignment"];
};
readonly c: {
readonly functions: readonly ["function_definition"];
readonly structs: readonly ["struct_specifier"];
readonly enums: readonly ["enum_specifier"];
readonly typedefs: readonly ["type_definition"];
readonly includes: readonly ["preproc_include"];
readonly variables: readonly ["declaration"];
};
readonly cpp: {
readonly functions: readonly ["function_definition"];
readonly classes: readonly ["class_specifier"];
readonly structs: readonly ["struct_specifier"];
readonly namespaces: readonly ["namespace_definition"];
readonly templates: readonly ["template_declaration"];
readonly includes: readonly ["preproc_include"];
readonly variables: readonly ["declaration"];
};
readonly html: {
readonly elements: readonly ["element"];
readonly scripts: readonly ["script_element"];
readonly styles: readonly ["style_element"];
};
readonly css: {
readonly rules: readonly ["rule_set"];
readonly media: readonly ["media_statement"];
readonly keyframes: readonly ["keyframes_statement"];
readonly imports: readonly ["import_statement"];
};
readonly bash: {
readonly functions: readonly ["function_definition"];
readonly commands: readonly ["command"];
readonly variables: readonly ["variable_assignment"];
};
};
type LanguageEnum = keyof typeof LANGUAGE_NODE_TYPES;
type Options = {
filter?: (language: LanguageEnum, node: SyntaxNode) => boolean;
excludeDirs?: RegExp[];
};
/**
* Reads a file, determines its language from the extension, and parses it into chunks.
* @param factory The ParserFactory to create parsers.
* @param options Options for chunking, including a filter function.
* @param baseDirPath The root directory path of the repository.
* @param relativeFilePath The path to the file to read and parse, relative to baseDirPath.
* @returns A promise that resolves to an array of BoundaryChunk, each with its filePath.
*/
declare const readFileAndChunk: (factory: ParserFactory, options: Options, baseDirPath: string, relativeFilePath: string) => Promise<BoundaryChunk[]>;
/**
* Recursively reads all files in a directory and its subdirectories,
* parsing each supported file into chunks.
* @param factory The ParserFactory to create parsers.
* @param options Options for chunking.
* @param baseDirPath The root directory path of the repository.
* @returns A promise that resolves to an array of BoundaryChunk from all parsed files.
*/
declare const readDirectoryAndChunk: (factory: ParserFactory, options: Options, baseDirPath: string) => Promise<BoundaryChunk[]>;
/**
* Parses a given code string into chunks based on the specified language using CST operations.
* @param code The code string to parse.
* @param language The programming language of the code (e.g., "typescript", "python").
* @param factory The ParserFactory to create parsers.
* @param options Options for chunking, including a filter function.
* @returns A promise that resolves to an array of BoundaryChunk.
*/
declare const parseCodeAndChunk: (code: string, language: LanguageEnum, // Use LanguageEnum for type safety
factory: ParserFactory, options: Options) => Promise<BoundaryChunk[]>;
export { type BoundaryChunk, type LanguageEnum, type Options, type ParserFactory, createParserFactory, parseCodeAndChunk, readDirectoryAndChunk, readFileAndChunk };