odiascript
Version:
Odia (Oriya) keyword-based programming language runtime. Created by Atikin Verse.
116 lines (98 loc) • 3.46 kB
JavaScript
const fs = require('fs');
const path = require('path');
const keywords = JSON.parse(
fs.readFileSync(path.join(__dirname, 'keywords.json'), 'utf8')
);
// Match Odia words as whole tokens, but ignore inside strings & comments
function tokenizePreservingLiterals(src) {
const tokens = [];
let i = 0;
const N = src.length;
while (i < N) {
const ch = src[i];
// String literals: "..." or '...' or `...`
if (ch === '"' || ch === "'" || ch === '`') {
const quote = ch;
let buf = ch; i++;
while (i < N) {
const c = src[i];
buf += c; i++;
if (c === '\\') { // skip escaped char
if (i < N) { buf += src[i]; i++; }
continue;
}
if (c === quote) break;
}
tokens.push({ type: 'string', value: buf });
continue;
}
// Line comment //
if (ch === '/' && i + 1 < N && src[i + 1] === '/') {
let buf = '';
while (i < N && src[i] !== '\n') { buf += src[i]; i++; }
tokens.push({ type: 'comment', value: buf });
continue;
}
// Block comment /* ... */
if (ch === '/' && i + 1 < N && src[i + 1] === '*') {
let buf = '/*'; i += 2;
while (i < N && !(src[i] === '*' && i + 1 < N && src[i + 1] === '/')) {
buf += src[i]; i++;
}
if (i < N) { buf += '*/'; i += 2; }
tokens.push({ type: 'comment', value: buf });
continue;
}
// Identifiers / words / operators
if (/\s/.test(ch)) {
// whitespace chunk
let buf = ch; i++;
while (i < N && /\s/.test(src[i])) { buf += src[i]; i++; }
tokens.push({ type: 'ws', value: buf });
continue;
}
// word or operator: consume maximal run of non-space characters until boundary helpful split
let buf = ch; i++;
while (i < N && !/\s/.test(src[i]) && !['"', "'", '`'].includes(src[i])) {
// stop splitting for comments start to allow detection next loop
if (buf.endsWith('/') && (src[i] === '/' || src[i] === '*')) break;
buf += src[i]; i++;
}
tokens.push({ type: 'code', value: buf });
}
return tokens;
}
function replaceTokens(tokens) {
// Replace only exact Odia words/operators when isolated by non-word boundaries.
const odiaKeys = Object.keys(keywords).sort((a, b) => b.length - a.length); // longest first
const wordBoundary = /([A-Za-z0-9_$\u0B00-\u0B7F])/; // include Odia block to avoid mid-word replacements
function safeReplaceChunk(chunk) {
let s = chunk;
for (const k of odiaKeys) {
// Replace only when k is isolated by boundaries (non-letter/number/underscore/odia)
const pattern = new RegExp(`(?<!${wordBoundary.source})${escapeReg(k)}(?!${wordBoundary.source})`, 'g');
s = s.replace(pattern, keywords[k]);
}
return s;
}
return tokens.map(t => {
if (t.type === 'code') return { ...t, value: safeReplaceChunk(t.value) };
return t;
});
}
function escapeReg(s) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function translateSource(src) {
const tokens = tokenizePreservingLiterals(src);
const replaced = replaceTokens(tokens);
return replaced.map(t => t.value).join('');
}
function translateFile(filepath) {
const src = fs.readFileSync(filepath, 'utf8');
return translateSource(src);
}
module.exports = {
translateSource,
translateFile
};