ts-web-scraper
Version:
A powerful web scraper for both static and client-side rendered sites using only Bun native APIs
80 lines • 2.34 kB
TypeScript
/**
* Generate a hash for content
*/
export declare function hashContent(content: any): Promise<string>;
/**
* Deep diff between two objects
*/
export declare function diff(oldObj: any, newObj: any, path?: string): Change[];
/**
* Compare two content snapshots
*/
export declare function compare(oldSnapshot: ContentSnapshot, newSnapshot: ContentSnapshot): DiffResult;
/**
* Calculate similarity between two strings (0-1)
*/
export declare function stringSimilarity(str1: string, str2: string): number;
/**
* Find differences in arrays using LCS algorithm
*/
export declare function arrayDiff<T>(oldArray: T[], newArray: T[], equals?: (a: T, b: T) => boolean): Array<{ type: ChangeType, value: T, index?: number }>;
export declare interface Change {
type: ChangeType
path: string
oldValue?: any
newValue?: any
}
export declare interface DiffResult {
hasChanges: boolean
changes: Change[]
summary: {
added: number
removed: number
modified: number
unchanged: number
}
}
export declare interface ContentSnapshot {
url: string
timestamp: Date
hash: string
content: any
metadata?: Record<string, any>
}
/**
* Content tracker for monitoring changes
*/
export declare class ContentTracker {
private snapshots: Map<string, ContentSnapshot[]>;
private options?: {
maxSnapshots?: number
storageDir?: string
};
constructor(options?: {
maxSnapshots?: number
storageDir?: string
});
snapshot(url: string, content: any, metadata?: Record<string, any>): Promise<ContentSnapshot>;
getSnapshots(url: string): ContentSnapshot[];
getLatest(url: string): ContentSnapshot | null;
hasChanged(url: string, newContent: any): Promise<boolean>;
getChanges(url: string, newContent: any): Promise<DiffResult | null>;
compareSnapshots(url: string, fromIndex: number, toIndex: number): DiffResult | null;
clear(url: string): void;
clearAll(): void;
getTrackedUrls(): string[];
private saveSnapshot(snapshot: ContentSnapshot): Promise<void>;
loadSnapshots(url: string): Promise<void>;
}
/**
* Content Change Detection
*
* Track and detect changes in scraped content over time
* Uses ONLY Bun native APIs - no external dependencies!
*/
export declare enum ChangeType {
ADDED = 'added',
REMOVED = 'removed',
MODIFIED = 'modified',
UNCHANGED = 'unchanged',
}