UNPKG

code-chopper

Version:

A library for semantically dividing code written in various programming languages

168 lines (160 loc) 7.19 kB
import Parser, { SyntaxNode } from 'tree-sitter'; /*! * Copyright (c) ushirononeko 2025 * Copyright (c) sirasagi62 2025 * Published under MIT License * see https://opensource.org/licenses/MIT * * This code was originally created by ushirononeko and modified by sirasagi62 * Original: https://github.com/ushironoko/gistdex */ interface ParserFactory { createParser: (language: string) => Promise<Parser | null>; dispose: () => void; } declare const createParserFactory: () => ParserFactory; type BoundaryInfo = { type: string; level?: number; name?: string; parent?: string[]; docs?: string; title?: string; }; type BoundaryChunk = { content: string; startOffset: number; endOffset: number; boundary: BoundaryInfo; filePath: string; }; /*! * Copyright (c) ushirononeko 2025 * Copyright (c) sirasagi62 2025 * Published under MIT License * see https://opensource.org/licenses/MIT * * This code was originally created by ushirononeko and modified by sirasagi62 * Original: https://github.com/ushironoko/gistdex */ declare const LANGUAGE_NODE_TYPES: { readonly javascript: { readonly functions: readonly ["function_declaration", "function_expression"]; readonly classes: readonly ["class_declaration"]; readonly methods: readonly ["method_definition"]; readonly imports: readonly ["import_statement"]; readonly variables: readonly ["variable_declaration", "lexical_declaration"]; }; readonly typescript: { readonly functions: readonly ["function_declaration", "function_expression"]; readonly classes: readonly ["class_declaration"]; readonly methods: readonly ["method_definition"]; readonly interfaces: readonly ["interface_declaration"]; readonly types: readonly ["type_alias_declaration"]; readonly imports: readonly ["import_statement"]; readonly variables: readonly ["variable_declaration", "lexical_declaration", "public_field_definition"]; }; readonly python: { readonly functions: readonly ["function_definition"]; readonly classes: readonly ["class_definition"]; readonly methods: readonly ["function_definition"]; readonly imports: readonly ["import_statement", "import_from_statement"]; readonly variables: readonly ["assignment"]; }; readonly go: { readonly functions: readonly ["function_declaration"]; readonly methods: readonly ["method_declaration"]; readonly types: readonly ["type_declaration"]; readonly imports: readonly ["import_declaration"]; readonly variables: readonly ["var_spec", "const_spec", "short_var_declaration"]; }; readonly rust: { readonly functions: readonly ["function_item"]; readonly structs: readonly ["struct_item"]; readonly impls: readonly ["impl_item"]; readonly traits: readonly ["trait_item"]; readonly imports: readonly ["use_declaration"]; readonly variables: readonly ["let_declaration"]; }; readonly java: { readonly functions: readonly ["method_declaration"]; readonly classes: readonly ["class_declaration"]; readonly interfaces: readonly ["interface_declaration"]; readonly imports: readonly ["import_declaration"]; readonly variables: readonly ["local_variable_declaration"]; }; readonly ruby: { readonly functions: readonly ["method"]; readonly classes: readonly ["class"]; readonly modules: readonly ["module"]; readonly imports: readonly ["require", "load"]; readonly variables: readonly ["assignment"]; }; readonly c: { readonly functions: readonly ["function_definition"]; readonly structs: readonly ["struct_specifier"]; readonly enums: readonly ["enum_specifier"]; readonly typedefs: readonly ["type_definition"]; readonly includes: readonly ["preproc_include"]; readonly variables: readonly ["declaration"]; }; readonly cpp: { readonly functions: readonly ["function_definition"]; readonly classes: readonly ["class_specifier"]; readonly structs: readonly ["struct_specifier"]; readonly namespaces: readonly ["namespace_definition"]; readonly templates: readonly ["template_declaration"]; readonly includes: readonly ["preproc_include"]; readonly variables: readonly ["declaration"]; }; readonly html: { readonly elements: readonly ["element"]; readonly scripts: readonly ["script_element"]; readonly styles: readonly ["style_element"]; }; readonly css: { readonly rules: readonly ["rule_set"]; readonly media: readonly ["media_statement"]; readonly keyframes: readonly ["keyframes_statement"]; readonly imports: readonly ["import_statement"]; }; readonly bash: { readonly functions: readonly ["function_definition"]; readonly commands: readonly ["command"]; readonly variables: readonly ["variable_assignment"]; }; }; type LanguageEnum = keyof typeof LANGUAGE_NODE_TYPES; type Options = { filter?: (language: LanguageEnum, node: SyntaxNode) => boolean; excludeDirs?: RegExp[]; }; /** * Reads a file, determines its language from the extension, and parses it into chunks. * @param factory The ParserFactory to create parsers. * @param options Options for chunking, including a filter function. * @param baseDirPath The root directory path of the repository. * @param relativeFilePath The path to the file to read and parse, relative to baseDirPath. * @returns A promise that resolves to an array of BoundaryChunk, each with its filePath. */ declare const readFileAndChunk: (factory: ParserFactory, options: Options, baseDirPath: string, relativeFilePath: string) => Promise<BoundaryChunk[]>; /** * Recursively reads all files in a directory and its subdirectories, * parsing each supported file into chunks. * @param factory The ParserFactory to create parsers. * @param options Options for chunking. * @param baseDirPath The root directory path of the repository. * @returns A promise that resolves to an array of BoundaryChunk from all parsed files. */ declare const readDirectoryAndChunk: (factory: ParserFactory, options: Options, baseDirPath: string) => Promise<BoundaryChunk[]>; /** * Parses a given code string into chunks based on the specified language using CST operations. * @param code The code string to parse. * @param language The programming language of the code (e.g., "typescript", "python"). * @param factory The ParserFactory to create parsers. * @param options Options for chunking, including a filter function. * @returns A promise that resolves to an array of BoundaryChunk. */ declare const parseCodeAndChunk: (code: string, language: LanguageEnum, // Use LanguageEnum for type safety factory: ParserFactory, options: Options) => Promise<BoundaryChunk[]>; export { type BoundaryChunk, type LanguageEnum, type Options, type ParserFactory, createParserFactory, parseCodeAndChunk, readDirectoryAndChunk, readFileAndChunk };