lamplighter-mcp
Version:
An intelligent context engine for AI-assisted software development
197 lines (166 loc) • 7.48 kB
text/typescript
import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
// Load environment variables
dotenv.config();
export class ConfluenceReader {
private baseUrl: string;
private apiToken: string;
private username: string;
constructor() {
this.baseUrl = process.env.CONFLUENCE_URL || '';
this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
this.username = process.env.CONFLUENCE_USERNAME || '';
if (!this.baseUrl) {
throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
}
if (!this.apiToken) {
throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
}
if (!this.username) {
throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
}
// Remove trailing slash if present
this.baseUrl = this.baseUrl.replace(/\/$/, '');
console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
}
/**
* Extract page ID from a Confluence URL or return the ID if directly provided
*/
private extractPageId(pageUrlOrId: string): string {
// If it's just a number, assume it's already a page ID
if (/^\d+$/.test(pageUrlOrId)) {
return pageUrlOrId;
}
let pageId: string | null = null;
try {
// Parse the URL
const parsedUrl = new URL(pageUrlOrId);
// Order matters: check more specific paths first
// Format: .../spaces/SPACE/pages/123456
const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
if (spacePagesMatch) pageId = spacePagesMatch[1];
else {
// Format: .../pages/123456/Page+Title or /pages/123456
const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
if (pagesMatch) pageId = pagesMatch[1];
else {
// Format: .../view/123456
const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
if (viewMatch) pageId = viewMatch[1];
else {
// Format: ...?pageId=123456
const pageIdParam = parsedUrl.searchParams.get('pageId');
if (pageIdParam) pageId = pageIdParam;
}
}
}
} catch (error) {
// This catch block specifically handles URL parsing errors
throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
}
// Check if an ID was found *after* trying to parse/extract
if (!pageId) {
// Use the original input in the error message for clarity
throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
}
return pageId;
}
/**
* Fetch content from a Confluence page by URL or ID
*/
async fetchPageContent(pageUrlOrId: string): Promise<string> {
try {
const pageId = this.extractPageId(pageUrlOrId);
console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
// Construct API URL for the content endpoint
const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
// Create Basic Auth token from username and API token
const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
// Make the API request
const response = await axios.get(apiUrl, {
headers: {
'Authorization': `Basic ${authToken}`,
'Content-Type': 'application/json'
}
});
// Extract the content from the response
const htmlContent = response.data.body.storage.value;
// Extract the title for reference
const title = response.data.title;
console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
// Process HTML content to extract the text
const textContent = this.extractTextFromHtml(htmlContent);
return textContent;
} catch (error) {
console.error('[ConfluenceReader] Error fetching page content:', error);
// Check for Axios specific errors first
if (axios.isAxiosError(error)) {
if (error.response) {
// Server responded with error status
// Try to extract a meaningful message
let message = 'Unknown server error';
if (error.response.data) {
if (typeof error.response.data === 'string') {
message = error.response.data;
} else if (error.response.data.message && typeof error.response.data.message === 'string') {
message = error.response.data.message;
} else {
try {
message = JSON.stringify(error.response.data);
} catch (jsonError) {
// fallback if stringify fails
}
}
}
throw new Error(`Confluence API error: ${error.response.status} - ${error.response.statusText || 'Status Text Unavailable'}. Message: ${message}`);
} else if (error.request) {
// Request made, no response received (network error)
throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
} else {
// Error setting up the request (neither response nor request is present)
throw new Error(`Axios setup error: ${error.message}`);
}
} else if (error instanceof Error) {
// Generic error handling (non-Axios errors or unexpected issues)
// Now this acts as a fallback if it wasn't an AxiosError
throw new Error(`Failed to fetch Confluence page: ${error.message}`);
} else {
// Handle cases where the thrown object is not an Error instance
throw new Error(`Failed to fetch Confluence page: ${String(error)}`);
}
}
}
/**
* Extract readable text content from HTML
*/
private extractTextFromHtml(html: string): string {
if (!html) return '';
let text = html;
// 1. Remove script and style blocks first
text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
.replace(/<style[^>]*>.*?<\/style>/gis, '');
// 2. Replace line break elements with newlines
text = text.replace(/<br\s*\/?>/gi, '\n');
// 3. Replace block elements with newlines (add space before to prevent word joining)
text = text.replace(/<\/(p|div|h[1-6]|li|blockquote|ul|ol|table|tr|td|th)>/gi, ' \n\n');
// 4. Strip remaining HTML tags
text = text.replace(/<[^>]*>/g, ''); // Remove all tags
// 5. Decode HTML entities
text = text.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'");
// Add more entities as needed
// 6. Clean up excessive whitespace and newlines
text = text.replace(/\r\n/g, '\n'); // Normalize line endings
text = text.replace(/[ \t]+/g, ' '); // Consolidate spaces/tabs
text = text.replace(/ \n/g, '\n'); // Remove space before newline
text = text.replace(/\n /g, '\n'); // Remove space after newline
text = text.replace(/\n{3,}/g, '\n\n'); // Reduce multiple newlines to max 2
text = text.trim(); // Final trim
return text;
}
}