UNPKG

markdown-crawler

Version:

A powerful web crawler that extracts content from web pages and converts them to clean Markdown format, with support for code blocks and GitHub Flavored Markdown

80 lines (79 loc) 1.95 kB
{ "name": "markdown-crawler", "version": "1.0.17", "description": "A powerful web crawler that extracts content from web pages and converts them to clean Markdown format, with support for code blocks and GitHub Flavored Markdown", "main": "dist/index.js", "bin": { "markdown-crawler": "dist/cli.js" }, "scripts": { "build": "tsc && chmod +x ./dist/cli.js", "dev": "tsc --watch", "clean": "rm -rf dist", "prebuild": "npm run clean", "test": "node dist/cli.js -- https://sample.site/docs/ sample.docs.yaml", "lint": "tsc --noEmit", "prepub": "npm version patch", "pub": "npm publish --access public" }, "files": [ "dist", "README.md", "README-*.md", "LICENSE" ], "keywords": [ "web-crawler", "markdown", "html-to-markdown", "readability", "content-extraction", "playwright", "cli-tool", "web-scraping", "gfm", "turndown", "ai", "llm", "ai-content", "ai-processing", "machine-learning", "nlp", "text-extraction", "semantic-content", "structured-data", "clean-text" ], "author": "gkctou@gmail.com", "license": "MIT", "repository": { "type": "git", "url": "git+https://github.com/gkctou/md-crawler.git" }, "bugs": { "url": "https://github.com/gkctou/md-crawler/issues" }, "homepage": "https://github.com/gkctou/md-crawler#readme", "engines": { "node": ">=16.0.0" }, "devDependencies": { "@types/node": "^22.13.14", "typescript": "^5.8.2" }, "dependencies": { "@mozilla/readability": "^0.6.0", "@types/minimatch": "^5.1.2", "cheerio": "^1.0.0", "commander": "^13.1.0", "jsdom": "^26.0.0", "minimatch": "^10.0.1", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "turndown": "^7.2.0", "turndown-plugin-gfm": "^1.0.2", "unified": "^11.0.5", "unist-util-visit": "^5.0.0", "yaml": "^2.7.0" } }