@hanivanrizky/nestjs-html-parser
Version:
A powerful NestJS HTML parsing service with XPath and CSS selector support, proxy configuration, random user agents, and rich response metadata including headers and status codes
313 lines ⢠14.5 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.demonstrateHtmlFetchResponse = demonstrateHtmlFetchResponse;
const html_parser_service_1 = require("../html-parser.service");
/**
* Example demonstrating HtmlFetchResponse usage
* Shows how to access and use response data, headers, status codes, and statusText
*/
async function demonstrateHtmlFetchResponse() {
const parser = new html_parser_service_1.HtmlParserService();
console.log('š HtmlFetchResponse - Complete Response Data Demo\n');
// Example 1: Basic Response Analysis
console.log('1. Basic Response Analysis');
console.log('==========================');
try {
const response = await parser.fetchHtml('https://httpbin.org/html', {
timeout: 10000,
retries: 1,
});
console.log('š Response Overview:');
console.log(` Status: ${response.status} ${response.statusText}`);
console.log(` Content Length: ${response.data.length} characters`);
console.log(` Headers Count: ${Object.keys(response.headers).length}`);
console.log();
console.log('šØ Response Headers:');
Object.entries(response.headers).forEach(([key, value]) => {
console.log(` ${key}: ${value}`);
});
console.log();
console.log('š Content Sample:');
console.log(` ${response.data.substring(0, 100)}...`);
console.log();
}
catch (error) {
console.error(`ā Error: ${error.message}\n`);
}
// Example 2: Status Code Handling
console.log('2. HTTP Status Code Handling');
console.log('=============================');
const testUrls = [
'https://httpbin.org/status/200', // Success
'https://httpbin.org/status/404', // Not Found
'https://httpbin.org/status/500', // Server Error
'https://httpbin.org/redirect/3', // Redirects
];
for (const url of testUrls) {
try {
const response = await parser.fetchHtml(url, {
timeout: 5000,
retries: 0, // No retries for demo
});
console.log(`ā
${url}`);
console.log(` Status: ${response.status} ${response.statusText}`);
// Handle different status codes
if (response.status >= 200 && response.status < 300) {
console.log(` ā
Success - Content length: ${response.data.length}`);
}
else if (response.status >= 300 && response.status < 400) {
console.log(` š Redirect - Location: ${response.headers.location || 'N/A'}`);
}
else if (response.status >= 400 && response.status < 500) {
console.log(` ā ļø Client Error - ${response.statusText}`);
}
else if (response.status >= 500) {
console.log(` š„ Server Error - ${response.statusText}`);
}
}
catch (error) {
console.log(`ā ${url}`);
console.log(` Error: ${error.message}`);
}
console.log();
}
// Example 3: Content-Type Based Processing
console.log('3. Content-Type Based Processing');
console.log('=================================');
const contentTypeUrls = [
'https://httpbin.org/html',
'https://httpbin.org/json',
'https://httpbin.org/xml',
];
for (const url of contentTypeUrls) {
try {
const response = await parser.fetchHtml(url, { timeout: 5000 });
const contentType = response.headers['content-type'] || 'unknown';
console.log(`š ${url}`);
console.log(` Content-Type: ${contentType}`);
console.log(` Status: ${response.status}`);
// Process based on content type
if (contentType.includes('text/html')) {
console.log(` š HTML Content - Extracting title...`);
const title = parser.extractText(response.data, '//title/text()', 'xpath');
console.log(` Title: ${title || 'No title found'}`);
const linkCount = parser.count(response.data, 'a', 'css');
console.log(` Links found: ${linkCount}`);
}
else if (contentType.includes('application/json')) {
console.log(` š JSON Content - Parsing...`);
try {
const jsonData = JSON.parse(response.data);
console.log(` JSON keys: ${Object.keys(jsonData).join(', ')}`);
}
catch (e) {
console.log(` ā Invalid JSON format`);
}
}
else if (contentType.includes('application/xml') ||
contentType.includes('text/xml')) {
console.log(` š XML Content - Extracting root element...`);
const rootElement = parser.extractSingle(response.data, '/*[1]', 'xpath');
console.log(` Root element: ${rootElement || 'Unable to parse XML'}`);
}
}
catch (error) {
console.log(`ā Error processing ${url}: ${error.message}`);
}
console.log();
}
// Example 4: Server Information Analysis
console.log('4. Server Information Analysis');
console.log('==============================');
try {
const response = await parser.fetchHtml('https://httpbin.org/response-headers?Server=CustomServer/1.0&X-Custom=MyValue', {
timeout: 5000,
});
console.log('š„ļø Server Analysis:');
console.log(` Server: ${response.headers.server || 'Unknown'}`);
console.log(` Date: ${response.headers.date || 'Unknown'}`);
console.log(` Connection: ${response.headers.connection || 'Unknown'}`);
// Check for security headers
const securityHeaders = [
'x-frame-options',
'x-content-type-options',
'x-xss-protection',
'strict-transport-security',
'content-security-policy',
];
console.log('\nš Security Headers:');
securityHeaders.forEach((header) => {
const value = response.headers[header];
console.log(` ${header}: ${value || 'Not set'}`);
});
// Check for custom headers
console.log('\nšļø Custom Headers:');
Object.entries(response.headers)
.filter(([key]) => key.startsWith('x-') && !securityHeaders.includes(key))
.forEach(([key, value]) => {
console.log(` ${key}: ${value}`);
});
}
catch (error) {
console.error(`ā Server analysis error: ${error.message}`);
}
console.log();
// Example 5: Caching and Performance Analysis
console.log('5. Caching and Performance Analysis');
console.log('===================================');
try {
const response = await parser.fetchHtml('https://httpbin.org/cache/60', {
timeout: 5000,
});
console.log('ā” Performance Metrics:');
console.log(` Content-Length: ${response.headers['content-length'] || 'Unknown'}`);
console.log(` Content-Encoding: ${response.headers['content-encoding'] || 'None'}`);
// Cache headers
console.log('\nš¾ Cache Information:');
console.log(` Cache-Control: ${response.headers['cache-control'] || 'Not set'}`);
console.log(` ETag: ${response.headers.etag || 'Not set'}`);
console.log(` Last-Modified: ${response.headers['last-modified'] || 'Not set'}`);
console.log(` Expires: ${response.headers.expires || 'Not set'}`);
const contentLengthHeader = response.headers['content-length'];
const contentLength = contentLengthHeader
? parseInt(contentLengthHeader, 10)
: 0;
const actualLength = response.data?.length || 0;
if (contentLength > 0 && actualLength >= 0) {
const compression = ((contentLength - actualLength) / contentLength) * 100;
console.log(`\nš Compression Analysis:`);
console.log(` Declared Length: ${contentLength.toLocaleString()} bytes`);
console.log(` Actual Length: ${actualLength.toLocaleString()} bytes`);
if (Number.isFinite(compression)) {
if (compression >= 0) {
console.log(` Compression: ${compression.toFixed(2)}%`);
}
else {
console.log(` Content Expansion: ${Math.abs(compression).toFixed(2)}% (larger than declared)`);
}
}
else {
console.log(` Compression: Unable to calculate (invalid data)`);
}
}
else if (contentLength === 0 && actualLength > 0) {
console.log(`\nš Compression Analysis:`);
console.log(` Declared Length: Not specified`);
console.log(` Actual Length: ${actualLength.toLocaleString()} bytes`);
console.log(` Compression: Cannot calculate (no Content-Length header)`);
}
else {
console.log(`\nš Compression Analysis: No content to analyze`);
}
}
catch (error) {
console.error(`ā Performance analysis error: ${error.message}`);
}
console.log();
// Example 6: Combined Response and Extraction
console.log('6. Combined Response Analysis and Content Extraction');
console.log('====================================================');
try {
const response = await parser.fetchHtml('https://httpbin.org/html', {
useRandomUserAgent: true,
timeout: 5000,
});
console.log('š Response + Content Analysis:');
console.log(` HTTP Status: ${response.status} ${response.statusText}`);
console.log(` Server: ${response.headers.server || 'Unknown'}`);
console.log(` Content-Type: ${response.headers['content-type'] || 'Unknown'}`);
// Only process if it's HTML and successful
if (response.status === 200 &&
response.headers['content-type']?.includes('html')) {
console.log('\nš Content Extraction:');
// Extract basic information
const title = parser.extractText(response.data, '//title/text()', 'xpath');
const headings = parser.extractMultiple(response.data, 'h1, h2, h3', 'css');
const paragraphs = parser.count(response.data, 'p', 'css');
const links = parser.extractAttributes(response.data, 'a', 'href', 'css');
console.log(` Title: ${title || 'No title'}`);
console.log(` Headings: ${headings.length} found`);
if (headings.length > 0) {
console.log(` First heading: ${headings[0]}`);
}
console.log(` Paragraphs: ${paragraphs}`);
console.log(` Links: ${links.length} found`);
// Extract structured data
const pageInfo = parser.extractStructured(response.data, {
title: {
selector: '//title/text()',
type: 'xpath',
},
headingCount: {
selector: 'h1, h2, h3, h4, h5, h6',
type: 'css',
transform: (value) => parser.count(response.data, 'h1, h2, h3, h4, h5, h6', 'css'),
},
hasImages: {
selector: 'img',
type: 'css',
transform: (value) => parser.exists(response.data, 'img', 'css'),
},
});
console.log('\nš Structured Analysis:');
console.log(` ${JSON.stringify(pageInfo, null, 2)}`);
// Create a comprehensive report
const report = {
response: {
status: response.status,
statusText: response.statusText,
server: response.headers.server,
contentType: response.headers['content-type'],
contentLength: response.data.length,
},
content: pageInfo,
links: links.slice(0, 3), // First 3 links
timestamp: new Date().toISOString(),
};
console.log('\nš Complete Report:');
console.log(JSON.stringify(report, null, 2));
}
else {
console.log(` ā ļø Cannot extract content - Status: ${response.status}, Content-Type: ${response.headers['content-type']}`);
}
}
catch (error) {
console.error(`ā Combined analysis error: ${error.message}`);
}
console.log();
// Example 7: Error Response Handling
console.log('7. Error Response Handling');
console.log('==========================');
try {
// This will likely cause a timeout or DNS error
const response = await parser.fetchHtml('https://nonexistent-domain-12345.example', {
timeout: 3000,
retries: 1,
});
// This won't execute due to error, but shows how you'd handle success
console.log(`Unexpected success: ${response.status}`);
}
catch (error) {
console.log('šØ Handling Fetch Error:');
console.log(` Error Type: ${error.constructor.name}`);
console.log(` Error Message: ${error.message}`);
// In real applications, you might want to:
// 1. Log errors for monitoring
// 2. Return default content
// 3. Try alternative URLs
// 4. Cache and return previous successful response
console.log(' š Error Handling Strategies:');
console.log(' ⢠Log error for monitoring');
console.log(' ⢠Try alternative URLs');
console.log(' ⢠Return cached content');
console.log(' ⢠Use fallback data source');
}
console.log('\nš HtmlFetchResponse demonstration completed!');
}
// Run the demo if this file is executed directly
if (require.main === module) {
demonstrateHtmlFetchResponse()
.then(() => console.log('\n⨠Demo completed successfully!'))
.catch((error) => console.error('\nš„ Demo failed:', error.message));
}
//# sourceMappingURL=html-fetch-response.js.map
;