markdown-crawler
Version:
A powerful web crawler that extracts content from web pages and converts them to clean Markdown format, with support for code blocks and GitHub Flavored Markdown
91 lines (90 loc) • 2.29 kB
JSON
{
"name": "markdown-crawler",
"version": "1.0.18",
"description": "A powerful web crawler that extracts content from web pages and converts them to clean Markdown format, with support for code blocks and GitHub Flavored Markdown",
"type": "module",
"main": "./dist/cjs/index.js",
"module": "./dist/esm/index.js",
"types": "./dist/types/index.d.ts",
"bin": {
"markdown-crawler": "./dist/esm/cli.js"
},
"scripts": {
"build": "npm run clean && npx tsc -p tsconfig.json && npx tsc -p tsconfig.cjs.json && chmod +x ./dist/esm/cli.js",
"dev": "npx tsc --watch",
"clean": "rm -rf dist",
"prebuild": "npm run clean",
"test": "node dist/esm/cli.js -- https://sample.site/docs/ sample.docs.yaml",
"lint": "npx tsc --noEmit",
"prepub": "npm version patch",
"pub": "npm publish --access public"
},
"files": [
"dist/esm",
"dist/cjs",
"dist/types",
"README.md",
"README-*.md",
"LICENSE"
],
"keywords": [
"web-crawler",
"markdown",
"html-to-markdown",
"readability",
"content-extraction",
"playwright",
"cli-tool",
"web-scraping",
"gfm",
"turndown",
"ai",
"llm",
"ai-content",
"ai-processing",
"machine-learning",
"nlp",
"text-extraction",
"semantic-content",
"structured-data",
"clean-text"
],
"author": "gkctou@gmail.com",
"license": "MIT",
"repository": {
"type": "git",
"url": "git+https://github.com/gkctou/md-crawler.git"
},
"bugs": {
"url": "https://github.com/gkctou/md-crawler/issues"
},
"exports": {
".": {
"import": "./dist/esm/index.js",
"require": "./dist/cjs/index.js"
}
},
"homepage": "https://github.com/gkctou/md-crawler#readme",
"engines": {
"node": ">=16.0.0"
},
"devDependencies": {
"@types/node": "^22.13.14",
"typescript": "^5.8.2"
},
"dependencies": {
"@mozilla/readability": "^0.6.0",
"@types/minimatch": "^5.1.2",
"cheerio": "^1.0.0",
"commander": "^13.1.0",
"jsdom": "^26.0.0",
"minimatch": "^10.0.1",
"remark-parse": "^11.0.0",
"remark-stringify": "^11.0.0",
"turndown": "^7.2.0",
"turndown-plugin-gfm": "^1.0.2",
"unified": "^11.0.5",
"unist-util-visit": "^5.0.0",
"yaml": "^2.7.0"
}
}