@amjur/courts-db-ts
Version:
TypeScript port of the Python courts-db library for legal court identification
239 lines (238 loc) • 8.79 kB
JavaScript
import { readFileSync, readdirSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import unidecode from 'unidecode';
import { PCREUtils } from '@syntropiq/xtrax';
import { processVariables as xtraxProcessVariables, recursiveSubstitute } from '@syntropiq/xtrax/template-engine';
const { escapeRegex, substituteEdition, substituteEditions, getPCREPatternFromData, convertNamedGroups } = PCREUtils;
import { compileRegex } from '@syntropiq/xtrax/pcre-utils';
// Get the directory of this module
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
export const dbRoot = join(__dirname, '../courts_db');
// Re-export the imported functions for use by index.ts
export { escapeRegex, substituteEdition, substituteEditions, getPCREPatternFromData, convertNamedGroups };
// Ordinals for court processing
export const ordinals = [
'first',
'second',
'third',
'fourth',
'fifth',
'sixth',
'seventh',
'eighth',
'nineth',
'tenth',
'eleventh',
'twelfth',
'thirteenth',
'fourteenth',
'fifteenth',
'sixteenth',
'seventeenth',
'eighteenth',
'nineteenth',
'twentieth',
];
/**
* Parse datetime strings in JSON data
*/
function datetimeParser(key, value) {
if ((key === 'start' || key === 'end') && typeof value === 'string') {
return value; // Keep as string for now, convert to Date if needed
}
return value;
}
/**
* Load and parse JSON data with datetime parsing
*/
function loadJsonData(filePath) {
const content = readFileSync(filePath, 'utf-8');
return JSON.parse(content, datetimeParser);
}
/**
* Load the courts database with proper variable processing
*/
export function loadCourtsDb() {
// Load base variables
const variablesPath = join(dbRoot, 'data', 'variables.json');
const rawVariables = loadJsonData(variablesPath);
// Load places files
const placesDir = join(dbRoot, 'data', 'places');
try {
const placeFiles = readdirSync(placesDir).filter(f => f.endsWith('.txt'));
for (const file of placeFiles) {
const filePath = join(placesDir, file);
const content = readFileSync(filePath, 'utf-8');
const lines = content.split('\n').map(line => line.trim()).filter(line => line);
const placeName = file.replace('.txt', '');
rawVariables[placeName] = `(${lines.join('|')})`;
}
}
catch (error) {
console.warn('Could not load places files:', error);
}
// Process variables using xtrax
const processedVariables = xtraxProcessVariables(rawVariables);
// Load and process courts.json
const courtsPath = join(dbRoot, 'data', 'courts.json');
let courtsContent = readFileSync(courtsPath, 'utf-8');
// Handle ordinals - find patterns like ${1-41} and replace with ordinal ranges
const ordinalMatches = courtsContent.match(/\$\{(\d+)-(\d+)\}/g);
if (ordinalMatches) {
for (const match of ordinalMatches) {
const [, start, end] = match.match(/\$\{(\d+)-(\d+)\}/);
const startNum = parseInt(start, 10);
const endNum = parseInt(end, 10);
// Get the ordinals for this range
const rangeOrdinals = ordinals.slice(startNum - 1, endNum);
const ordinalRegex = `(${rangeOrdinals.join('|')})`;
courtsContent = courtsContent.replace(match, ordinalRegex);
}
}
// Substitute variables in the courts content
for (const [key, value] of Object.entries(processedVariables)) {
const variablePattern = new RegExp(`\\$\\{${escapeRegex(key)}\\}`, 'g');
courtsContent = courtsContent.replace(variablePattern, value);
}
// Handle backslashes carefully - only escape single backslashes that aren't already escaped
// This preserves regex patterns like \\d+ while escaping any unescaped backslashes
courtsContent = courtsContent.replace(/(?<!\\)\\(?!\\)/g, '\\\\');
// Parse the processed JSON
const data = JSON.parse(courtsContent);
// Handle parent court inheritance and normalize parent field
for (const court of data) {
// Normalize missing parent field to null (to match Python behavior)
if (!court.parent) {
court.parent = null;
}
if (court.parent) {
const parent = data.find(c => c.id === court.parent);
if (parent) {
// Inherit missing properties from parent
if (!court.dates || court.dates.length === 0) {
court.dates = parent.dates;
}
if (!court.type) {
court.type = parent.type;
}
if (!court.location) {
court.location = parent.location;
}
}
}
}
return data;
}
/**
* Load regex variables (now deprecated - use loadCourtsDb which includes processed variables)
*/
export function loadRegexVariables() {
const variablesPath = join(dbRoot, 'data', 'variables.json');
const rawVariables = loadJsonData(variablesPath);
return xtraxProcessVariables(rawVariables);
}
/**
* Create a dictionary mapping court IDs to court objects
*/
export function makeCourtDictionary(courts) {
const courtDict = {};
for (const court of courts) {
courtDict[court.id] = court;
}
return courtDict;
}
/**
* Compile all regex patterns for courts
*/
export async function gatherRegexes(courts) {
const variables = loadRegexVariables();
const compiledRegexes = {};
for (const court of courts) {
const regexes = [];
// Add court name as a regex pattern (like Python version does)
const courtRegexPatterns = [...court.regex, court.name];
for (const regexPattern of courtRegexPatterns) {
try {
// Use xtrax recursive substitution for variables
let processedPattern = recursiveSubstitute(regexPattern, variables);
// Convert Python named groups to PCRE format if needed
processedPattern = convertNamedGroups(processedPattern);
// Normalize Unicode characters in the pattern to match input normalization
processedPattern = unidecode(processedPattern);
// Make pattern case-insensitive by adding (?i) flag
if (!processedPattern.startsWith('(?i)')) {
processedPattern = '(?i)' + processedPattern;
}
// Compile using XTRAX PCRE compiler
const compiledRegex = await compileRegex(processedPattern);
regexes.push(compiledRegex);
}
catch (error) {
console.warn(`Failed to compile PCRE regex for court ${court.id}: ${regexPattern}`, error);
}
}
compiledRegexes[court.id] = regexes;
}
return compiledRegexes;
}
/**
* Check if a court is a bankruptcy court
*/
export function isBankruptcyCourt(court) {
return court.id.includes('bap') ||
court.id.includes('bank') ||
court.name.toLowerCase().includes('bankruptcy');
}
/**
* Check if a court matches the location filter
*/
export function matchesLocation(court, location) {
if (!location)
return true;
const locationLower = location.toLowerCase();
// Check court location
if (court.location && court.location.toLowerCase().includes(locationLower)) {
return true;
}
// Check court name
if (court.name.toLowerCase().includes(locationLower)) {
return true;
}
// Check jurisdiction
if (court.jurisdiction && court.jurisdiction.toLowerCase().includes(locationLower)) {
return true;
}
return false;
}
/**
* Check if a court was active on a given date
*/
export function isCourtActiveOnDate(court, date) {
if (!date)
return true;
for (const dateRange of court.dates) {
const startDate = dateRange.start ? new Date(dateRange.start) : null;
const endDate = dateRange.end ? new Date(dateRange.end) : null;
// If no start date, assume always active from beginning
if (!startDate && !endDate)
return true;
// If no start date but has end date
if (!startDate && endDate) {
if (date <= endDate)
return true;
}
// If has start date but no end date
if (startDate && !endDate) {
if (date >= startDate)
return true;
}
// If both start and end dates
if (startDate && endDate) {
if (date >= startDate && date <= endDate)
return true;
}
}
return false;
}