@amjur/courts-db-ts
Version:
TypeScript port of the Python courts-db library for legal court identification
197 lines (196 loc) • 6.62 kB
JavaScript
import unidecode from 'unidecode';
import { stripPunc } from './text-utils.js';
import { loadCourtsDb, makeCourtDictionary, gatherRegexes, isBankruptcyCourt, matchesLocation, isCourtActiveOnDate } from './utils.js';
// Lazy-loaded data structures
let courts = null;
let courtDict = null;
let regexes = null;
/**
* Get courts data (lazy loaded)
*/
function getCourts() {
if (courts === null) {
courts = loadCourtsDb();
}
return courts;
}
/**
* Get court dictionary (lazy loaded)
*/
function getCourtDict() {
if (courtDict === null) {
courtDict = makeCourtDictionary(getCourts());
}
return courtDict;
}
/**
* Get compiled regexes (lazy loaded)
*/
async function getRegexes() {
if (regexes === null) {
regexes = await gatherRegexes(getCourts());
}
return regexes;
}
/**
* Find court IDs with our courts-db regex list
*/
export async function findCourtIdsByName(courtStr, bankruptcy, location, allowPartialMatches = false) {
const compiledRegexes = await getRegexes();
const allCourts = getCourts();
const matches = [];
// Normalize the input string
const normalizedInput = unidecode(stripPunc(courtStr)).toLowerCase();
for (const court of allCourts) {
// Filter by bankruptcy if specified
if (bankruptcy !== null && bankruptcy !== undefined) {
const isBank = isBankruptcyCourt(court);
if (bankruptcy !== isBank) {
continue;
}
}
// Filter by location if specified
if (location && !matchesLocation(court, location)) {
continue;
}
// Check if any PCRE regex matches
const courtRegexes = compiledRegexes[court.id] || [];
for (const pcreRegex of courtRegexes) {
try {
const testResult = pcreRegex.test(normalizedInput);
if (testResult) {
matches.push({
courtId: court.id,
matchedText: normalizedInput
});
break; // Found a match for this court, move to next court
}
}
catch (error) {
console.warn(`Error testing regex for court ${court.id}:`, error);
}
}
}
// Reduce matches to filter out parent courts when child courts match
const reducedMatches = await reduceCourtMatches(matches);
// Filter out matches where one matched string is contained within another
const filteredMatches = filterSubstringMatches(reducedMatches);
return filteredMatches.map(m => m.courtId);
}
/**
* Filter courts by date
*/
export function filterCourtsByDate(courtIds, date) {
if (!date) {
return courtIds;
}
const dict = getCourtDict();
return courtIds.filter(courtId => {
const court = dict[courtId];
return court && isCourtActiveOnDate(court, date);
});
}
/**
* Filter courts by bankruptcy
*/
export function filterCourtsByBankruptcy(courtIds, bankruptcy) {
if (bankruptcy === null || bankruptcy === undefined) {
return courtIds;
}
const dict = getCourtDict();
return courtIds.filter(courtId => {
const court = dict[courtId];
return court && isBankruptcyCourt(court) === bankruptcy;
});
}
/**
* Find court by ID
*/
export function findCourtById(courtId) {
const dict = getCourtDict();
const court = dict[courtId];
return court ? [court] : [];
}
/**
* Find court with comprehensive filtering
*/
export async function findCourt(courtStr, options = {}) {
const { bankruptcy = null, location = null, allowPartialMatches = false, date = null } = options;
// First find by name with basic filters
let matches = await findCourtIdsByName(courtStr, bankruptcy, location, allowPartialMatches);
// Check bankruptcy cases if appropriate (like Python does - double filtering)
if (bankruptcy !== null && bankruptcy !== undefined) {
matches = filterCourtsByBankruptcy(matches, bankruptcy);
}
// Then filter by date if provided
if (date) {
matches = filterCourtsByDate(matches, date);
}
return matches;
}
/**
* Reduce matches to filter out parent courts when child courts match
*/
async function reduceCourtMatches(matches) {
if (matches.length <= 1) {
return matches;
}
const dict = getCourtDict();
const parentIds = new Set();
// Collect all parent IDs from the matching courts
for (const match of matches) {
const court = dict[match.courtId];
if (court?.parent) {
parentIds.add(court.parent);
}
}
// Filter out court IDs that are parents of other matching courts
const reducedList = matches.filter(match => !parentIds.has(match.courtId));
return reducedList.length > 0 ? reducedList : matches;
}
/**
* Filter out matches where one matched string is contained within another
*/
function filterSubstringMatches(matches) {
if (matches.length <= 1) {
return matches;
}
const matchedStrings = matches.map(m => m.matchedText);
// Filter out any matched string that is a substring of another matched string
const filteredStrings = matchedStrings.filter(str => {
// Check if this string is a substring of any other string
const isSubstring = matchedStrings.some(otherStr => otherStr !== str && otherStr.includes(str));
return !isSubstring;
});
// Return matches that correspond to the filtered strings
return matches.filter(match => filteredStrings.includes(match.matchedText));
}
/**
* Reduce to lowest possible match - filter out parent courts when child courts match
* @deprecated Use reduceCourtMatches instead
*/
function reduceCourtList(courtIds) {
if (courtIds.length <= 1) {
return courtIds;
}
const dict = getCourtDict();
const parentIds = new Set();
// Collect all parent IDs from the matching courts
for (const courtId of courtIds) {
const court = dict[courtId];
if (court?.parent) {
parentIds.add(court.parent);
}
}
// Filter out court IDs that are parents of other matching courts
const reducedList = courtIds.filter(courtId => !parentIds.has(courtId));
return reducedList.length > 0 ? reducedList : courtIds;
}
// Export data accessors for compatibility
export { getCourts as courts };
export { getCourtDict as courtDict };
export { getRegexes as regexes };
// Export all types
export * from './types.js';
export * from './text-utils.js';
export * from './utils.js';