UNPKG

odiascript

Version:

Odia (Oriya) keyword-based programming language runtime. Created by Atikin Verse.

116 lines (98 loc) 3.46 kB
const fs = require('fs'); const path = require('path'); const keywords = JSON.parse( fs.readFileSync(path.join(__dirname, 'keywords.json'), 'utf8') ); // Match Odia words as whole tokens, but ignore inside strings & comments function tokenizePreservingLiterals(src) { const tokens = []; let i = 0; const N = src.length; while (i < N) { const ch = src[i]; // String literals: "..." or '...' or `...` if (ch === '"' || ch === "'" || ch === '`') { const quote = ch; let buf = ch; i++; while (i < N) { const c = src[i]; buf += c; i++; if (c === '\\') { // skip escaped char if (i < N) { buf += src[i]; i++; } continue; } if (c === quote) break; } tokens.push({ type: 'string', value: buf }); continue; } // Line comment // if (ch === '/' && i + 1 < N && src[i + 1] === '/') { let buf = ''; while (i < N && src[i] !== '\n') { buf += src[i]; i++; } tokens.push({ type: 'comment', value: buf }); continue; } // Block comment /* ... */ if (ch === '/' && i + 1 < N && src[i + 1] === '*') { let buf = '/*'; i += 2; while (i < N && !(src[i] === '*' && i + 1 < N && src[i + 1] === '/')) { buf += src[i]; i++; } if (i < N) { buf += '*/'; i += 2; } tokens.push({ type: 'comment', value: buf }); continue; } // Identifiers / words / operators if (/\s/.test(ch)) { // whitespace chunk let buf = ch; i++; while (i < N && /\s/.test(src[i])) { buf += src[i]; i++; } tokens.push({ type: 'ws', value: buf }); continue; } // word or operator: consume maximal run of non-space characters until boundary helpful split let buf = ch; i++; while (i < N && !/\s/.test(src[i]) && !['"', "'", '`'].includes(src[i])) { // stop splitting for comments start to allow detection next loop if (buf.endsWith('/') && (src[i] === '/' || src[i] === '*')) break; buf += src[i]; i++; } tokens.push({ type: 'code', value: buf }); } return tokens; } function replaceTokens(tokens) { // Replace only exact Odia words/operators when isolated by non-word boundaries. const odiaKeys = Object.keys(keywords).sort((a, b) => b.length - a.length); // longest first const wordBoundary = /([A-Za-z0-9_$\u0B00-\u0B7F])/; // include Odia block to avoid mid-word replacements function safeReplaceChunk(chunk) { let s = chunk; for (const k of odiaKeys) { // Replace only when k is isolated by boundaries (non-letter/number/underscore/odia) const pattern = new RegExp(`(?<!${wordBoundary.source})${escapeReg(k)}(?!${wordBoundary.source})`, 'g'); s = s.replace(pattern, keywords[k]); } return s; } return tokens.map(t => { if (t.type === 'code') return { ...t, value: safeReplaceChunk(t.value) }; return t; }); } function escapeReg(s) { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } function translateSource(src) { const tokens = tokenizePreservingLiterals(src); const replaced = replaceTokens(tokens); return replaced.map(t => t.value).join(''); } function translateFile(filepath) { const src = fs.readFileSync(filepath, 'utf8'); return translateSource(src); } module.exports = { translateSource, translateFile };