document-extraction-service
Version:
A service for handling document extraction and processing
60 lines (53 loc) • 1.52 kB
JavaScript
const axios = require('axios');
const testExtraction = async () => {
try {
// Test document
const docId = `doc-${Date.now()}`;
const traceId = `trace-${Date.now()}`;
const streamId = 'test-stream';
// Make extraction request
const response = await axios.post('http://localhost:5005/extract', {
content: {
text: 'This is a test document.\n\nIt has multiple paragraphs.\n\nEach paragraph will become a chunk.',
metadata: {
source: 'test'
}
},
docId,
streamId
}, {
headers: {
'Content-Type': 'application/json',
'X-Document-ID': docId,
'X-Trace-ID': traceId,
'X-Callback-URL': 'http://localhost:3000/callback' // Your callback endpoint
}
});
console.log('Extraction response:', response.data);
} catch (error) {
console.error('Test failed:', error.response?.data || error.message);
}
};
// Example of setting custom extraction logic
const setCustomLogic = () => {
const { setCustomExtractor } = require('./index');
setCustomExtractor((content) => {
// Your custom extraction logic here
const text = content.text || content;
return [{
chunkId: 'custom-chunk-1',
chunkText: text,
pageNumber: 1,
chunkType: 'CustomText',
extractionMethod: 'custom',
extractionStrategy: 'custom'
}];
});
};
// Run test
if (require.main === module) {
testExtraction().catch(console.error);
}
module.exports = {
setCustomLogic
};