UNPKG

@ansvar/singapore-law-mcp

Version:

Complete Singapore law database — 523 Acts, 28K+ provisions from Singapore Statutes Online (sso.agc.gov.sg) with full-text search, definitions, and citation support

193 lines 7.08 kB
#!/usr/bin/env tsx /** * Singapore Law MCP — Census Script * * Enumerates ALL current Acts from Singapore Statutes Online (sso.agc.gov.sg) * by scraping the browse page with PageSize=500 pagination. * * Outputs data/census.json in golden standard format. * * Usage: * npx tsx scripts/census.ts * npx tsx scripts/census.ts --page 2 # Fetch only page 2 (resume) */ import * as fs from 'fs'; import * as path from 'path'; import { fileURLToPath } from 'url'; import { fetchWithRateLimit } from './lib/fetcher.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const SSO_BASE = 'https://sso.agc.gov.sg'; const BROWSE_URL = `${SSO_BASE}/Browse/Act/Current/All`; const CENSUS_PATH = path.resolve(__dirname, '../data/census.json'); const PAGE_SIZE = 500; function parseArgs() { const args = process.argv.slice(2); let page = null; for (let i = 0; i < args.length; i++) { if (args[i] === '--page' && args[i + 1]) { page = parseInt(args[i + 1], 10); i++; } } return { page }; } /** * Normalise an Act code into a stable, lowercase, kebab-case ID. * E.g. "PDPA2012" -> "pdpa-2012", "CoA1967" -> "coa-1967" */ function actCodeToId(code) { // Insert a hyphen before a trailing year (4 digits at end) const withHyphen = code.replace(/(\D)(\d{4})$/, '$1-$2'); return withHyphen.toLowerCase(); } /** * Parse act entries from an SSO browse page HTML. * Pattern: <a class="non-ajax" href="/Act/CODE" rel="">Title</a> */ function parseActEntries(html) { const entries = []; const seen = new Set(); const pattern = /<a\s+class="non-ajax"\s+href="\/Act\/([^"?]+)"\s+rel="">([^<]+)<\/a>/g; let match; while ((match = pattern.exec(html)) !== null) { const actCode = match[1].trim(); const title = match[2].trim(); if (!seen.has(actCode)) { seen.add(actCode); entries.push({ actCode, title }); } } return entries; } /** * Extract total result count from browse page. * Pattern: "523 results" or similar. */ function extractTotalCount(html) { const match = html.match(/(\d+)\s+result/i); return match ? parseInt(match[1], 10) : null; } /** * Load existing census for merge/resume. */ function loadExistingCensus() { const existing = new Map(); if (fs.existsSync(CENSUS_PATH)) { try { const data = JSON.parse(fs.readFileSync(CENSUS_PATH, 'utf-8')); for (const law of data.laws) { existing.set(law.id, law); } } catch { // Ignore parse errors, start fresh } } return existing; } async function main() { const { page: singlePage } = parseArgs(); console.log('Singapore Law MCP — Census'); console.log('==========================\n'); console.log(` Source: ${SSO_BASE}`); console.log(` Browse URL: ${BROWSE_URL}`); console.log(` Page size: ${PAGE_SIZE}`); if (singlePage) console.log(` Single page mode: page ${singlePage}`); console.log(); const existingEntries = loadExistingCensus(); if (existingEntries.size > 0) { console.log(` Loaded ${existingEntries.size} existing entries from previous census\n`); } const allActEntries = []; let totalFromServer = null; // Determine pages to fetch const pages = singlePage ? [singlePage] : [1]; for (const pageNum of pages) { const url = pageNum === 1 ? `${BROWSE_URL}?PageSize=${PAGE_SIZE}&SortBy=Title&SortOrder=ASC` : `${BROWSE_URL}/${pageNum}?PageSize=${PAGE_SIZE}&SortBy=Title&SortOrder=ASC`; console.log(` Fetching page ${pageNum}: ${url}`); const result = await fetchWithRateLimit(url); if (result.status !== 200) { console.log(` ERROR: HTTP ${result.status} for page ${pageNum}`); continue; } const entries = parseActEntries(result.body); console.log(` Found ${entries.length} acts on page ${pageNum}`); allActEntries.push(...entries); // Get total count from first page if (pageNum === 1 && !singlePage) { totalFromServer = extractTotalCount(result.body); if (totalFromServer) { console.log(` Total results reported: ${totalFromServer}`); const totalPages = Math.ceil(totalFromServer / PAGE_SIZE); if (totalPages > 1) { console.log(` Need ${totalPages} pages total`); for (let p = 2; p <= totalPages; p++) { pages.push(p); } } } } } console.log(`\n Total unique acts found: ${allActEntries.length}`); // Convert to census entries, merging with existing data const today = new Date().toISOString().split('T')[0]; for (const { actCode, title } of allActEntries) { const id = actCodeToId(actCode); const url = `${SSO_BASE}/Act/${actCode}`; // Preserve ingestion data from existing census if available const existing = existingEntries.get(id); const entry = { id, title, identifier: actCode, url, status: 'in_force', category: 'act', classification: 'ingestable', ingested: existing?.ingested ?? false, provision_count: existing?.provision_count ?? 0, ingestion_date: existing?.ingestion_date ?? null, }; existingEntries.set(id, entry); } // Build final census const allLaws = Array.from(existingEntries.values()).sort((a, b) => a.title.localeCompare(b.title)); const ingestable = allLaws.filter(l => l.classification === 'ingestable').length; const inaccessible = allLaws.filter(l => l.classification === 'inaccessible').length; const excluded = allLaws.filter(l => l.classification === 'excluded').length; const census = { schema_version: '1.0', jurisdiction: 'SG', jurisdiction_name: 'Singapore', portal: SSO_BASE, census_date: today, agent: 'claude-opus-4-6', summary: { total_laws: allLaws.length, ingestable, ocr_needed: 0, inaccessible, excluded, }, laws: allLaws, }; fs.mkdirSync(path.dirname(CENSUS_PATH), { recursive: true }); fs.writeFileSync(CENSUS_PATH, JSON.stringify(census, null, 2)); console.log('\n=========================='); console.log('Census Complete'); console.log('==========================\n'); console.log(` Total acts: ${allLaws.length}`); console.log(` Ingestable: ${ingestable}`); console.log(` Inaccessible: ${inaccessible}`); console.log(` Excluded: ${excluded}`); console.log(`\n Output: ${CENSUS_PATH}`); } main().catch(error => { console.error('Fatal error:', error); process.exit(1); }); //# sourceMappingURL=census.js.map