json-mcp-filter
Version:
MCP server providing JSON schema generation and filtering tools using quicktype
267 lines (266 loc) • 12.2 kB
JavaScript
import { JsonIngestionStrategy } from './JsonIngestionStrategy.js';
/**
* Strategy for ingesting JSON from HTTPS URLs
* Handles both static JSON files and API endpoints that return JSON
* Unified strategy for all HTTPS JSON sources
*/
export class HttpsJsonStrategy extends JsonIngestionStrategy {
requestTimeout = 10000; // 10 second timeout
maxResponseSize = 50 * 1024 * 1024; // 50MB limit
canHandle(source) {
// Handle HTTPS URLs only (for security)
if (!source.startsWith('https://')) {
return false;
}
// For Phase 2, we'll handle URLs that look like JSON files
// or we'll try them and check the content-type
return true; // We'll validate the content-type after fetching
}
async ingest(source) {
try {
// Validate URL format
let url;
try {
url = new URL(source);
}
catch (error) {
return {
success: false,
error: {
type: 'invalid_url',
message: `Invalid URL format: ${source}`,
details: error
}
};
}
// Only allow HTTPS for security
if (url.protocol !== 'https:') {
return {
success: false,
error: {
type: 'invalid_url',
message: 'Only HTTPS URLs are supported for security reasons',
details: { protocol: url.protocol }
}
};
}
// Fetch the content
let response;
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.requestTimeout);
response = await fetch(source, {
signal: controller.signal,
headers: {
'Accept': 'application/json, text/plain, */*',
'User-Agent': 'json-mcp-filter/1.0.2'
}
});
clearTimeout(timeoutId);
}
catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
return {
success: false,
error: {
type: 'network_error',
message: `Request timeout after ${this.requestTimeout}ms`,
details: error
}
};
}
return {
success: false,
error: {
type: 'network_error',
message: `Failed to fetch from ${source}: ${error instanceof Error ? error.message : String(error)}`,
details: error
}
};
}
// Check response status with enhanced error detection
if (!response.ok) {
// Handle authentication/authorization errors (401/403)
if (response.status === 401 || response.status === 403) {
let errorMessage = 'This endpoint requires authentication or access is denied.';
if (response.status === 401) {
errorMessage = 'Authentication required: This endpoint needs valid credentials. Verify this is a public API endpoint.';
}
else if (response.status === 403) {
errorMessage = 'Access forbidden: This endpoint may require authentication, be restricted by region, or have IP restrictions. Verify this is a publicly accessible endpoint.';
}
// Try to get additional context from response body
let responsePreview = '';
try {
const responseText = await response.text();
responsePreview = responseText.substring(0, 200);
}
catch (e) {
// Ignore errors reading response body
}
return {
success: false,
error: {
type: 'authentication_required',
message: errorMessage,
details: {
status: response.status,
statusText: response.statusText,
url: source,
responsePreview: responsePreview || undefined
}
}
};
}
// Handle rate limiting
if (response.status === 429) {
return {
success: false,
error: {
type: 'rate_limit_exceeded',
message: 'API rate limit exceeded. Please wait before making more requests to this endpoint.',
details: {
status: response.status,
statusText: response.statusText,
url: source,
retryAfter: response.headers.get('Retry-After') || undefined
}
}
};
}
// Handle server errors (500-series)
if (response.status >= 500) {
return {
success: false,
error: {
type: 'server_error',
message: `Server error (HTTP ${response.status}): This is likely a temporary issue with the endpoint. Try again later.`,
details: {
status: response.status,
statusText: response.statusText,
url: source
}
}
};
}
// Handle other client errors (404, etc.)
return {
success: false,
error: {
type: 'network_error',
message: `HTTP ${response.status}: ${response.statusText}. ${response.status === 404 ? 'Endpoint not found - verify the URL is correct.' : 'Client error occurred.'}`,
details: {
status: response.status,
statusText: response.statusText,
url: source
}
}
};
}
// Check content size before downloading
const contentLength = response.headers.get('content-length');
if (contentLength) {
const size = parseInt(contentLength, 10);
if (size > this.maxResponseSize) {
return {
success: false,
error: {
type: 'content_too_large',
message: `Response too large (${Math.round(size / 1024 / 1024)}MB). This tool is optimized for JSON files under ${Math.round(this.maxResponseSize / 1024 / 1024)}MB.`,
details: {
contentLength: size,
maxSize: this.maxResponseSize,
url: source
}
}
};
}
}
// Get response text with size protection
let content;
try {
content = await response.text();
// Check actual content size after reading (for cases without Content-Length header)
const actualSize = new TextEncoder().encode(content).length;
if (actualSize > this.maxResponseSize) {
return {
success: false,
error: {
type: 'content_too_large',
message: `Response too large (${Math.round(actualSize / 1024 / 1024)}MB after reading). This tool is optimized for JSON files under ${Math.round(this.maxResponseSize / 1024 / 1024)}MB.`,
details: {
actualSize: actualSize,
maxSize: this.maxResponseSize,
url: source
}
}
};
}
}
catch (error) {
return {
success: false,
error: {
type: 'network_error',
message: 'Failed to read response content. This may be due to network issues or the response being too large.',
details: { error, url: source }
}
};
}
// For HTTPS strategy, we're flexible about content-type
// We'll validate the content is JSON rather than checking content-type headers
// This allows us to work with APIs that don't set proper content-type headers
const validation = this.validateJsonContent(content);
if (!validation.success) {
// If JSON validation fails, provide more context about content-type and response
const contentType = response.headers.get('content-type') || 'unknown';
const contentPreview = content.substring(0, 200);
let enhancedMessage = `Response content is not valid JSON. Content-Type: ${contentType}.`;
// Provide specific guidance based on content preview
if (contentPreview.toLowerCase().includes('<!doctype html') || contentPreview.toLowerCase().includes('<html')) {
enhancedMessage += ' The response appears to be HTML - verify the URL points to a JSON endpoint, not a web page.';
}
else if (contentPreview.toLowerCase().includes('<?xml')) {
enhancedMessage += ' The response appears to be XML - this tool only supports JSON format.';
}
else if (contentPreview.trim() === '') {
enhancedMessage += ' The response is empty - the endpoint may not be returning data.';
}
else {
enhancedMessage += ' Verify the endpoint returns valid JSON format.';
}
return {
success: false,
error: {
type: 'invalid_json',
message: enhancedMessage,
details: {
contentType,
url: source,
contentPreview,
responseSize: content.length
}
}
};
}
return validation;
}
catch (error) {
return {
success: false,
error: {
type: 'validation_error',
message: 'Unexpected error during HTTPS JSON ingestion',
details: error
}
};
}
}
getMetadata() {
return {
name: 'HttpsJsonStrategy',
description: 'Fetches JSON data from HTTPS URLs (static files and API endpoints)',
supportedSources: ['HTTPS URLs serving JSON content', 'JSON API endpoints', 'Static .json files', 'Any HTTPS URL returning valid JSON']
};
}
}