third-party-web
Version:
Categorized data on third party entities on the web.
247 lines (219 loc) • 8.21 kB
JavaScript
const fs = require('fs')
const path = require('path')
const {entities, getRootDomain, getEntity, getProduct} = require('./index.js')
describe('getRootDomain', () => {
it('works for IP addresses', () => {
expect(getRootDomain('8.8.8.8')).toEqual('8.8.8.8')
expect(getRootDomain('192.168.0.1')).toEqual('192.168.0.1')
})
it('works for basic domains', () => {
expect(getRootDomain('cdn.cnn.com')).toEqual('cnn.com')
expect(getRootDomain('www.hulce.photography')).toEqual('hulce.photography')
expect(getRootDomain('api.supercool.io')).toEqual('supercool.io')
})
it('works for country-tlds', () => {
expect(getRootDomain('content.yahoo.co.jp')).toEqual('yahoo.co.jp')
expect(getRootDomain('go.visit.gov.in')).toEqual('visit.gov.in')
})
it('works for URLs', () => {
expect(getRootDomain('https://content.yahoo.co.jp/path/?query=param')).toEqual('yahoo.co.jp')
expect(getRootDomain('https://a.b.c.it/path/?query=param&two=2')).toEqual('c.it')
expect(getRootDomain('https://foo.bar:433/path/?query=param&two=2')).toEqual('foo.bar')
})
it('works for localhost', () => {
expect(getRootDomain('https://localhost:8080/path/?query=param')).toEqual('localhost')
expect(getRootDomain('https://localhost/path/?query=param&two=2')).toEqual('localhost')
expect(getRootDomain('localhost:9000/path/?query=param&two=2')).toEqual('localhost')
expect(getRootDomain('localhost:1200')).toEqual('localhost')
})
it('works for wildcard domains', () => {
expect(getRootDomain('*.google.com')).toEqual('google.com')
expect(getRootDomain('*.yahoo.co.jp')).toEqual('yahoo.co.jp')
expect(getRootDomain('*.hulce.photography')).toEqual('hulce.photography')
})
it('runs on *massive* inputs', () => {
const massiveInput = '123456789'.repeat(100e3)
expect(getRootDomain(massiveInput)).toEqual(null)
})
it('runs on data URIs', () => {
const dataUri = ''
expect(getRootDomain(dataUri)).toEqual(null)
})
it('returns null on invalid inputs', () => {
expect(getRootDomain('this is not a domain')).toEqual(null)
expect(getRootDomain('neither-is-this')).toEqual(null)
expect(getRootDomain('http://nor this')).toEqual(null)
})
})
describe('getEntity', () => {
it('works for direct domain usage', () => {
expect(getEntity('https://js.connect.facebook.net/lib.js')).toMatchInlineSnapshot(`
Object {
"averageExecutionTime": 347.4278160199557,
"categories": Array [
"social",
],
"category": "social",
"company": "Facebook",
"domains": Array [
"*.facebook.com",
"*.atlassbx.com",
"*.fbsbx.com",
"fbcdn-photos-e-a.akamaihd.net",
"*.facebook.net",
"*.fbcdn.net",
],
"examples": Array [
"www.facebook.com",
"connect.facebook.net",
"staticxx.facebook.com",
"static.xx.fbcdn.net",
"m.facebook.com",
"an.facebook.com",
"platform-lookaside.fbsbx.com",
],
"homepage": "https://www.facebook.com",
"name": "Facebook",
"products": Array [
Object {
"categories": Array [
"social",
],
"category": "social",
"company": "Facebook",
"facades": Array [
Object {
"name": "React Live Chat Loader",
"repo": "https://github.com/calibreapp/react-live-chat-loader",
},
],
"name": "Facebook Messenger Customer Chat",
"urlPatterns": Array [
/connect\\\\\\.facebook\\\\\\.net\\\\/\\.\\*\\\\/sdk\\\\/xfbml\\\\\\.customerchat\\\\\\.js/,
],
},
],
"totalExecutionTime": 1097107210,
"totalOccurrences": 3157799,
}
`)
})
it('works for inferred domain usage', () => {
expect(getEntity('https://unknown.typekit.net/fonts.css')).toMatchInlineSnapshot(`
Object {
"averageExecutionTime": 660.2645605704683,
"categories": Array [
"cdn",
],
"category": "cdn",
"company": "Adobe",
"domains": Array [
"*.typekit.com",
"*.typekit.net",
],
"examples": Array [
"use.typekit.net",
"p.typekit.net",
],
"homepage": "https://fonts.adobe.com/",
"name": "Adobe TypeKit",
"products": Array [],
"totalExecutionTime": 78981507,
"totalOccurrences": 119621,
}
`)
})
it('does not over-infer', () => {
expect(getEntity('https://unknown.gstatic.com/what')).toEqual(undefined)
})
it('only infers as a fallback', () => {
expect(getEntity('http://fbcdn-photos-e-a.akamaihd.net/1234.jpg').name).toEqual('Facebook')
expect(getEntity('http://unknown.akamaihd.net/1234.jpg').name).toEqual('Akamai')
})
it('runs on *massive* inputs', () => {
const massiveInput = '123456789'.repeat(100e3)
expect(getEntity(massiveInput)).toEqual(undefined)
})
it('runs on data URIs', () => {
const dataUri = ''
expect(getEntity(dataUri)).toEqual(undefined)
})
it('supports multi-tennant domains', () => {
expect(getEntity('https://gemius.mgr.consensu.org/cmp/v2/stub.js').name).toEqual('Gemius CMP')
expect(
getEntity('https://quantcast.mgr.consensu.org/choice/KygWsHah2_7Qa/rssing.com/choice.js').name
).toEqual('Quantcast Choice')
expect(getEntity('https://static.quantcast.mgr.consensu.org/v50/cmpui-popup.js').name).toEqual(
'Quantcast Choice'
)
})
})
describe('getProduct', () => {
it('works on basic url', () => {
expect(getProduct('https://www.youtube.com/embed/alGcULGtiv8')).toMatchObject({
name: 'YouTube Embedded Player',
company: 'YouTube',
category: 'video',
categories: ['video'],
facades: [
{
name: 'Lite YouTube',
repo: 'https://github.com/paulirish/lite-youtube-embed',
},
{
name: 'Ngx Lite Video',
repo: 'https://github.com/karim-mamdouh/ngx-lite-video',
},
],
})
})
it('works on regex based', () => {
expect(
getProduct('https://connect.facebook.net/en_US/sdk/xfbml.customerchat.js')
).toMatchObject({
name: 'Facebook Messenger Customer Chat',
facades: [
{
name: 'React Live Chat Loader',
repo: 'https://github.com/calibreapp/react-live-chat-loader',
},
],
})
})
it('returns undefined when product does not match', () => {
expect(getProduct('https://js.connect.facebook.net/lib.js')).toEqual(undefined)
})
it('returns undefined with no products', () => {
expect(getProduct('https://unknown.typekit.net/fonts.css')).toEqual(undefined)
})
})
describe('build state', () => {
it('should use the complete entities set', () => {
const sourceOfTruthEntities = require('../data/entities.js')
expect(entities).toHaveLength(sourceOfTruthEntities.length)
})
it('should have all the same subsets in root as lib', () => {
const srcSizes = fs.readdirSync(path.join(__dirname, 'subsets'))
const dstSizes = fs.readdirSync(path.join(__dirname, '../')).filter(f => f.includes('-subset'))
expect(dstSizes).toHaveLength(srcSizes.length) // run `yarn build` if this fails
for (const file of dstSizes) {
if (file.endsWith('.js')) require(path.join(__dirname, '../', file))
}
})
})
it('should work on real web data', () => {
const urls = fs
.readFileSync(path.join(__dirname, '../data/random-urls.txt'), 'utf8')
.split('\n')
.filter(Boolean)
for (const url of urls) {
getEntity(url) // ensure it doesn't throw
}
const top1000 = urls.slice(0, 1000).map(url => {
const cleanedUrl = url.split('?')[0]
const entity = getEntity(url)
return `${entity && entity.name} - ${cleanedUrl}`
})
// It's expected that this snapshot will change as coverage changes.
expect(top1000).toMatchSnapshot()
})