@pinkpixel/prysm-mcp
Version:
MCP server for the Prysm web scraper - enabling AI assistants to scrape web content
313 lines (296 loc) • 7.92 kB
YAML
openapi: 3.0.0
info:
title: Prysm Scraper API
version: 1.0.0
description: API for the Prysm structure-aware web scraper
contact:
name: Pink Pixel
url: https://pinkpixel.dev
license:
name: MIT
url: https://opensource.org/licenses/MIT
servers:
- url: http://localhost:3000
description: Development server
tags:
- name: Jobs
description: Scraping job operations
- name: Results
description: Scraping results operations
components:
schemas:
ScraperOptions:
type: object
properties:
pages:
type: integer
description: Maximum number of pages to scrape by following links
default: 1
minimum: 1
maximum: 100
images:
type: boolean
description: Whether to download images from the page
default: false
output:
type: string
description: Custom output path for results (default: ~/prysm/output)
nullable: true
imageOutput:
type: string
description: Custom output path for images (default: ~/prysm/output/images)
nullable: true
JobRequest:
type: object
required:
- url
properties:
url:
type: string
format: uri
description: URL to scrape
options:
$ref: '#/components/schemas/ScraperOptions'
priority:
type: integer
minimum: 1
maximum: 10
default: 5
description: Job priority (1 = highest, 10 = lowest)
webhook:
type: string
format: uri
description: URL to receive webhook notifications when job completes
JobResponse:
type: object
properties:
jobId:
type: string
description: Unique identifier for the job
status:
type: string
enum: [pending, processing, completed, failed, cancelled]
url:
type: string
format: uri
createdAt:
type: string
format: date-time
startedAt:
type: string
format: date-time
nullable: true
completedAt:
type: string
format: date-time
nullable: true
error:
type: string
nullable: true
progress:
type: integer
minimum: 0
maximum: 100
ScrapingResults:
type: object
properties:
title:
type: string
content:
type: array
items:
type: string
metadata:
type: object
structureType:
type: string
paginationType:
type: string
extractionMethod:
type: string
url:
type: string
format: uri
JobWithResults:
allOf:
- $ref: '#/components/schemas/JobResponse'
- type: object
properties:
result:
$ref: '#/components/schemas/ScrapingResults'
Error:
type: object
properties:
message:
type: string
code:
type: string
details:
type: object
responses:
NotFound:
description: The specified resource was not found
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
BadRequest:
description: The request was malformed
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
Unauthorized:
description: Authentication is required or failed
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
TooManyRequests:
description: Request rate limit exceeded
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
paths:
/api/jobs:
post:
summary: Create a new scraping job
description: |
Start a new scraping job with the specified URL and options.
This is the primary endpoint for initiating scraping operations.
tags:
- Jobs
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/JobRequest'
examples:
basic:
summary: Basic scraping job
value:
url: "https://example.com"
withOptions:
summary: Scraping with options
value:
url: "https://example.com"
options:
pages: 5
images: true
output: "/custom/path"
responses:
'202':
description: Job accepted
content:
application/json:
schema:
$ref: '#/components/schemas/JobResponse'
'400':
description: Bad request
get:
summary: List all jobs
description: Retrieve a list of all jobs with optional filtering
tags:
- Jobs
parameters:
- name: status
in: query
schema:
type: string
enum: [pending, processing, completed, failed, cancelled]
description: Filter jobs by status
- name: limit
in: query
schema:
type: integer
default: 20
description: Maximum number of jobs to return
- name: offset
in: query
schema:
type: integer
default: 0
description: Offset for pagination
responses:
'200':
description: List of jobs
content:
application/json:
schema:
type: object
properties:
jobs:
type: array
items:
$ref: '#/components/schemas/JobResponse'
total:
type: integer
limit:
type: integer
offset:
type: integer
/api/jobs/{jobId}:
get:
summary: Get job status
description: Get the status and details of a specific job
tags:
- Jobs
parameters:
- name: jobId
in: path
required: true
schema:
type: string
description: Job identifier
responses:
'200':
description: Job details
content:
application/json:
schema:
$ref: '#/components/schemas/JobResponse'
'404':
description: Job not found
delete:
summary: Cancel or delete job
description: Cancel a job or delete its results
tags:
- Jobs
parameters:
- name: jobId
in: path
required: true
schema:
type: string
description: Job identifier
responses:
'204':
description: Job cancelled or deleted
'404':
description: Job not found
/api/jobs/{jobId}/results:
get:
summary: Get job results
description: Retrieve the results of a completed job
tags:
- Results
parameters:
- name: jobId
in: path
required: true
schema:
type: string
description: Job identifier
responses:
'200':
description: Job results
content:
application/json:
schema:
$ref: '#/components/schemas/JobWithResults'
'404':
description: Job or results not found
'409':
description: Job not completed