norminette-mcp
Version:
MCP server for 42 School norminette coding standard checker
250 lines (249 loc) • 8.13 kB
JavaScript
import { Token, TokenType } from './token.js';
import { keywords, operators, brackets, sortedOperators } from './dictionary.js';
export class CLexer {
input;
position = 0;
line = 1;
column = 1;
constructor(input) {
this.input = input;
}
tokenize() {
const tokens = [];
while (this.position < this.input.length) {
const token = this.nextToken();
if (token) {
tokens.push(token);
}
}
// Add EOF token
tokens.push(new Token(TokenType.EOF, { line: this.line, column: this.column }));
return tokens;
}
nextToken() {
this.skipWhitespace();
if (this.position >= this.input.length) {
return null;
}
const currentPos = { line: this.line, column: this.column };
// Handle comments
if (this.peek() === '/' && this.peek(1) === '/') {
return this.readLineComment(currentPos);
}
if (this.peek() === '/' && this.peek(1) === '*') {
return this.readBlockComment(currentPos);
}
// Handle preprocessor directives
if (this.peek() === '#') {
return this.readPreprocessor(currentPos);
}
// Handle string literals
if (this.peek() === '"') {
return this.readString(currentPos);
}
// Handle character literals
if (this.peek() === "'") {
return this.readChar(currentPos);
}
// Handle numbers
if (this.isDigit(this.peek())) {
return this.readNumber(currentPos);
}
// Handle operators (check longest first)
for (const op of sortedOperators) {
if (this.match(op)) {
this.advance(op.length);
return new Token(operators[op], currentPos, op);
}
}
// Handle brackets
const bracket = this.peek();
if (bracket && bracket in brackets) {
this.advance();
return new Token(brackets[bracket], currentPos, bracket);
}
// Handle identifiers and keywords
if (this.isAlpha(this.peek()) || this.peek() === '_') {
return this.readIdentifier(currentPos);
}
// Handle whitespace tokens (important for formatting)
if (this.peek() === ' ') {
return this.readSpaces(currentPos);
}
if (this.peek() === '\t') {
this.advance();
return new Token(TokenType.TAB, currentPos, '\t');
}
if (this.peek() === '\n') {
const newlineToken = new Token(TokenType.NEWLINE, currentPos, '\n');
this.advanceWithNewline();
return newlineToken;
}
// Unknown character - skip it
const unknownChar = this.peek();
this.advance();
return new Token('UNKNOWN', currentPos, unknownChar || '');
}
readSpaces(pos) {
let spaces = '';
while (this.peek() === ' ') {
spaces += this.advance();
}
return new Token(TokenType.SPACE, pos, spaces);
}
readLineComment(pos) {
let comment = '';
while (this.peek() && this.peek() !== '\n') {
comment += this.advance();
}
return new Token(TokenType.COMMENT, pos, comment);
}
readBlockComment(pos) {
let comment = '';
while (this.position < this.input.length - 1) {
if (this.peek() === '*' && this.peek(1) === '/') {
comment += this.advance(); // *
comment += this.advance(); // /
break;
}
comment += this.advanceWithNewline();
}
return new Token(TokenType.COMMENT, pos, comment);
}
readPreprocessor(pos) {
let directive = '';
while (this.peek() && this.peek() !== '\n') {
directive += this.advance();
}
return new Token(TokenType.HASH, pos, directive);
}
readString(pos) {
let str = '';
str += this.advance(); // opening quote
while (this.peek() && this.peek() !== '"') {
if (this.peek() === '\\') {
str += this.advance(); // backslash
if (this.peek()) {
str += this.advance(); // escaped character
}
}
else {
str += this.advanceWithNewline();
}
}
if (this.peek() === '"') {
str += this.advance(); // closing quote
}
return new Token(TokenType.STRING, pos, str);
}
readChar(pos) {
let char = '';
char += this.advance(); // opening quote
while (this.peek() && this.peek() !== "'") {
if (this.peek() === '\\') {
char += this.advance(); // backslash
if (this.peek()) {
char += this.advance(); // escaped character
}
}
else {
char += this.advanceWithNewline();
}
}
if (this.peek() === "'") {
char += this.advance(); // closing quote
}
return new Token(TokenType.CONSTANT, pos, char);
}
readNumber(pos) {
let num = '';
// Handle hex numbers
if (this.peek() === '0' && (this.peek(1) === 'x' || this.peek(1) === 'X')) {
num += this.advance(); // 0
num += this.advance(); // x
while (this.isHexDigit(this.peek())) {
num += this.advance();
}
}
else {
// Decimal number
while (this.isDigit(this.peek())) {
num += this.advance();
}
// Handle decimal point
if (this.peek() === '.' && this.isDigit(this.peek(1))) {
num += this.advance(); // .
while (this.isDigit(this.peek())) {
num += this.advance();
}
}
}
// Handle suffixes (l, L, f, F, etc.)
while (this.isAlpha(this.peek())) {
num += this.advance();
}
return new Token(TokenType.CONSTANT, pos, num);
}
readIdentifier(pos) {
let id = '';
while (this.isAlnum(this.peek()) || this.peek() === '_') {
id += this.advance();
}
// Check if it's a keyword
const tokenType = keywords[id] || TokenType.IDENTIFIER;
return new Token(tokenType, pos, id);
}
skipWhitespace() {
// Only skip spaces, tabs, and newlines that we don't want to preserve
// For formatting, we actually want to preserve most whitespace as tokens
}
peek(offset = 0) {
const pos = this.position + offset;
return pos < this.input.length ? this.input[pos] : null;
}
advance(count = 1) {
let result = '';
for (let i = 0; i < count && this.position < this.input.length; i++) {
const char = this.input[this.position];
result += char;
this.position++;
this.column++;
}
return result;
}
advanceWithNewline() {
const char = this.input[this.position];
this.position++;
if (char === '\n') {
this.line++;
this.column = 1;
}
else {
this.column++;
}
return char;
}
match(str) {
for (let i = 0; i < str.length; i++) {
if (this.peek(i) !== str[i]) {
return false;
}
}
return true;
}
isDigit(char) {
return char !== null && char >= '0' && char <= '9';
}
isHexDigit(char) {
return char !== null && ((char >= '0' && char <= '9') ||
(char >= 'a' && char <= 'f') ||
(char >= 'A' && char <= 'F'));
}
isAlpha(char) {
return char !== null && ((char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z'));
}
isAlnum(char) {
return this.isAlpha(char) || this.isDigit(char);
}
}