detect-string-format
Version:
detect formats from array of String
98 lines (89 loc) • 5.44 kB
text/typescript
/* eslint-disable prettier/prettier */
/* eslint-disable @typescript-eslint/no-explicit-any */
import returnFormatDetector from '../lib/detect-format';
import { JSONSchema7 } from 'json-schema';
// const defaultFormats = require("ajv/lib/compile/formats")('fast'); // https://github.com/epoberezkin/ajv/blob/master/lib/compile/formats.js
// const sample_data = require("../test/format.json");
// https://github.com/epoberezkin/ajv/blob/master/spec/typescript/index.ts
const formats = ['date', 'time', 'date-time', 'uri', 'email', 'ipv4', 'ipv6', 'uuid'];
const stringFormats: JSONSchema7[] = [
{
pattern: '^(\\([0-9]{3}\\))?[0-9]{3}-[0-9]{4}$',
$comment: 'American Phone Number'
},
{ pattern: '^4[0-9]{12}(?:[0-9]{3})?$', $comment: 'Visa Credit Card' },
{ pattern: '^3[47][0-9]{13}$', $comment: 'American Express Credit Card' },
{
pattern: '^3(?:0[0-5]|[68][0-9])[0-9]{11}$',
$comment: 'Diners Credit Club'
},
{
pattern: '^[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,20}',
$comment: 'IBANN'
},
...formats.map((format: string) => ({ format }))
];
//const fastDetect:Function = returnFormatDetector({ schemas: stringFormats, ajvOptions: { format: 'full' }, options: { minHits: 0 }}); // full|fast
const fastDetect: Function = returnFormatDetector({}); // full|fast
// There are two modes of format validation: fast and full. This mode affects formats date, time, date-time, uri, uri-reference, email, and hostname.
// "full" - more restrictive and slow validation. E.g., 25:00:00 and 2015/14/33 will be invalid time and date in 'full' mode but it will be valid in 'fast' mode.
// false - ignore all format keywords.
const sampleSize = 10 * 1000;
//prettier-ignore
const testData: any = {
Telss: ['555-1212', '555-1211112'],
TelFl: ['(888)555-1212 ext. 532', '(800)FLOWERS'],
URI: ['scheme:[//authority]path[?query][#fragment]', 'https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top', '#fragment'],
urlst: ['https://www.example.com/foo/?bar=baz&inga=42&quux', "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", 'http://foo.com/unicode_(✪)_in_parens', 'https://github.com/epoberezkin/ajv/blob/master/lib/compile/formats.js'],
slow: ['https://www.example.com843244fdst4b46xn6e', 'https://www.exampl234e.com85fdst4b46xn6e', 'https://www.exam24ple.com86fdst4b46xn6e', 'https://ww24w.example.com87fdst4b46xn6e', 'https://www.example.com88fdst4b46xn6e', 'https://www.example.com89fdst4b46xn6e', 'https://www.example.com90fdst4b46xn6e', 'https://www.example.com91f5dst4b46xn6e', 'https://www.example.com92f4dst4b46xn6e', 'https://www.example.com93fdst43b46xn6e', 'https://www.example.com94fdst4b146xn6e'],
ipv4: ['0.0.0.0', '127.0.0.53', '127.0.0.1', '192.168.1.13', '0.0.0.0', '1.2.3.4'],
ipv6: ['fe00::0', 'ff02::3', '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'fe80::f2de:f1ff:fe55:53'],
dates: ['1963-06-19', '2020-10-02'],
Time: ['12:34:56.789', '12:34:56+01:00', '12:34:56'],
urlst2: 'x'.repeat(sampleSize).split('').map((x, i) => `https://www.example.com${i}fdst4ljeflajtl4q5h5kqjhkltjhklsjdhflkdjhflb46${x}n6e`),
ipv62: 'x'.repeat(sampleSize).split('').map((x, i) => `2001:0db8:85a3:0000:${i}:0370:7334`),
Time2: 'x'.repeat(sampleSize).split('').map((x, i) => `12:34:56.${i}`),
URIs: 'x'.repeat(sampleSize).split('').map((x, i) => `1https://vvbwet2${x}f3vr${i}`),
ipv44: 'x'.repeat(sampleSize).split('').map((x, i) => `ASR§B%B%NDVV192.${i}.1.13`),
DATE: 'x'.repeat(sampleSize).split('').map((x, i) => new Date(Date.now() - i).toISOString()),
IBANs: 'x'.repeat(sampleSize).split('').map((x, i) => `DE64500105178934265523${i}`),
AMEs: 'x'.repeat(sampleSize).split('').map((x, i) => `37873449367100${i % 3}`),
TELs: 'x'.repeat(sampleSize).split('').map((x, i) => `(888)555-332${i % 3}`),
ccc: 'x'.repeat(sampleSize).split('').map((x, i) => `ASR§B%B%NDVV192.${i}.1.13`),
ddd: 'x'.repeat(sampleSize).split('').map((x, i) => new Date(Date.now() - i).toISOString()),
fff: 'x'.repeat(sampleSize).split('').map((x, i) => `DE64500105178934265523${i}`),
hhh: 'x'.repeat(sampleSize).split('').map((x, i) => `37873449367100${i % 3}`),
kkk: 'x'.repeat(sampleSize).split('').map((x, i) => `(888)555-332${i % 3}`)
};
// testData.DATE.push("1963-06-19") // no format
// testData.DATE.push("2020-02-17T19:06:11.736Z") // no format
// const {DATE} = testData;
const start = Date.now();
let hits = 0,
checked = 0;
const results: any = [];
const runs = 2;
const addHits = (i: number): number => (hits += Number(i));
for (let i = 0; i < runs; i++) {
console.log('##### run ', i);
for (const key in testData) {
checked += testData[key].length * stringFormats.length;
const res = fastDetect(testData[key]);
//if(res && res.length) console.log(key,res);
if (res && res.length) {
if (i < 1) results.push({ key, res });
res.map((h: any) => addHits((h.$comment || '').split('matched:')[1]));
}
}
//results.map( (r:any)=> r.res.map( (h:any)=> addHits( (h.$comment||'').split('matched:')[1] )));
}
console.log('');
results.forEach((r: any) => console.log([r.key, r.res.map((f: any) => f.format + ' ' + f.$comment)].join('\t\t')));
console.log({ matched: results.length });
console.log('');
console.log({
checked: (checked / 1000 / 1000).toFixed(0) + 'M',
hits: (hits / 1000 / 1000).toFixed(0) + 'M',
elapsed: Date.now() - start,
hpS: (hits * 1000) / (Date.now() - start)
});