char-encoding-detector
Version:
Character encoding detector
84 lines (83 loc) • 2.93 kB
TypeScript
import { Recognizer, Context, Match } from '../type';
/**
* Asian double or multi-byte - charsets.
* Match is determined mostly by the input data adhering to the
* encoding scheme for the charset, and, optionally,
* frequency-of-occurence of characters.
*/
declare class mbcs implements Recognizer {
commonChars: any;
name(): string;
/**
* Test the match of this charset with the input text data
* which is obtained via the CharsetDetector object.
*
* @param det The CharsetDetector, which contains the input text
* to be checked for being in this charset.
* @return Two values packed into one int (Damn java, anyhow)
* bits 0-7: the match confidence, ranging from 0-100
* bits 8-15: The match reason, an enum-like value.
*/
match(det: Context): Match;
/**
* Get the next character (however many bytes it is) from the input data
* Subclasses for specific charset encodings must implement this function
* to get characters according to the rules of their encoding scheme.
*
* This function is not a method of class iteratedChar only because
* that would require a lot of extra derived classes, which is awkward.
* @param it The iteratedChar 'struct' into which the returned char is placed.
* @param det The charset detector, which is needed to get at the input byte data
* being iterated over.
* @return True if a character was returned, false at end of input.
*/
nextChar(iter: any, det: any): boolean;
}
/**
* Shift-JIS charset recognizer.
*/
export declare class sjis extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: any, det: any): boolean;
}
/**
* Big5 charset recognizer.
*/
export declare class big5 extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: any, det: any): boolean;
}
/**
* The charset recognize for EUC-JP. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
export declare class euc_jp extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: any, det: any): boolean;
}
/**
* The charset recognize for EUC-KR. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
export declare class euc_kr extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: any, det: any): boolean;
}
/**
* GB-18030 recognizer. Uses simplified Chinese statistics.
*/
export declare class gb_18030 extends mbcs {
name(): string;
language(): string;
nextChar(iter: any, det: any): boolean;
commonChars: number[];
}
export {};