js-harvester

Version:

Harvester is a lightweight and highly optimized library for extracting data from the DOM tree. It supports tag texts and their attributes. To extract data, the user needs to describe a template of the DOM branch structure from which the extraction will be

github.com/tmptrash/harvester

tmptrash/harvester

97 lines (96 loc) • 2.55 kB

JSON

View Raw

{ "name": "js-harvester", "version": "0.3.3", "description": "Harvester is a lightweight and highly optimized library for extracting data from the DOM tree. It supports tag texts and their attributes. To extract data, the user needs to describe a template of the DOM branch structure from which the extraction will be performed, and then call the harvest() function with two parameters: the described template and the parent branch in the DOM where the search will begin. This library can be used both in the browser and in web scraping tools such as Puppeteer.", "main": "index.js", "scripts": { "test": "standard && jest", "lint": "standard" }, "repository": { "type": "git", "url": "git+https://github.com/tmptrash/harvester.git" }, "keywords": [ "puppeteer", "lightweight", "optimized", "web-scraping", "web", "scraping", "data-extraction", "data", "extraction", "html-parsing", "html", "parsing", "dom-parsing", "dom", "parsing", "scraping", "extraction", "harvesting", "data-harvesting", "template-based-scraping", "template-based", "scraping", "template-extraction", "template", "extraction", "pattern-based-scraping", "pattern-based", "scraping", "visual-scraping-template", "declarative-scraping", "fuzzy-scraping", "fuzzy", "scraping", "approximate-scraping", "approximate", "scraping", "resilient-scraping", "resilient", "scraping", "flexible-scraping", "flexible", "scraping", "structure-agnostic-scraping", "semantic-scraping", "tree-template-scraping", "tree-template", "scraping", "pseudo-tree-template", "string-template-scraping", "string-template", "scraping", "indentation-based-template", "visual-template", "javascript-scraping", "javascript", "scraping", "npm-package", "browser-scraping", "nodejs-scraping", "dom-traversal", "dom-manipulation", "frontend-scraping", "hierarchical-data-extraction", "nested-data-extraction", "attribute-extraction", "text-extraction", "web-automation", "content-extraction", "web-data-extraction" ], "author": "flatline", "license": "MIT", "bugs": { "url": "https://github.com/tmptrash/harvester/issues" }, "homepage": "https://github.com/tmptrash/harvester#readme", "devDependencies": { "jest": "^29.7.0", "jsdom": "^26.0.0", "standard": "^17.1.2" } }