invoice_ocr_mcp
Version:
企业发票OCR识别MCP服务器,基于RapidOCR和ModelScope生态,支持多种发票类型的结构化识别
835 lines (680 loc) • 29.5 kB
Markdown
# 使用示例
本文档提供了Invoice OCR MCP的详细使用示例,包括Python、JavaScript等多种语言的客户端实现。
## 基础示例
### Python客户端示例
#### 单张发票识别
```python
import asyncio
import base64
from mcp.client.session import ClientSession
from mcp.client.stdio import stdio_client
async def recognize_single_invoice():
"""识别单张发票示例"""
# 读取发票图像
with open("invoice.jpg", "rb") as f:
image_data = base64.b64encode(f.read()).decode()
# 连接MCP服务器
async with stdio_client(["python", "src/invoice_ocr_mcp/server.py"]) as streams:
async with ClientSession(streams[0], streams[1]) as session:
# 初始化会话
await session.initialize()
# 调用识别工具
result = await session.call_tool(
"recognize_single_invoice",
{
"image_data": image_data,
"output_format": "standard"
}
)
# 处理结果
if result.get("success"):
invoice_data = result["data"]
print(f"发票类型: {invoice_data['invoice_type']['name']}")
print(f"发票号码: {invoice_data['basic_info']['invoice_number']}")
print(f"总金额: {invoice_data['basic_info']['total_amount']}")
print(f"销售方: {invoice_data['seller_info']['name']}")
else:
print(f"识别失败: {result['error']['message']}")
# 运行示例
asyncio.run(recognize_single_invoice())
```
#### 批量发票识别
```python
import asyncio
import base64
import os
from mcp.client.session import ClientSession
from mcp.client.stdio import stdio_client
async def recognize_batch_invoices():
"""批量识别发票示例"""
# 准备图像数据
invoice_dir = "invoices/"
images = []
for filename in os.listdir(invoice_dir):
if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
filepath = os.path.join(invoice_dir, filename)
with open(filepath, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
images.append({
"id": filename,
"image_data": image_data
})
# 连接MCP服务器
async with stdio_client(["python", "src/invoice_ocr_mcp/server.py"]) as streams:
async with ClientSession(streams[0], streams[1]) as session:
await session.initialize()
# 批量识别
result = await session.call_tool(
"recognize_batch_invoices",
{
"images": images,
"parallel_count": 3
}
)
# 处理结果
if result.get("success"):
batch_data = result["data"]
print(f"总计: {batch_data['total_count']} 张")
print(f"成功: {batch_data['success_count']} 张")
print(f"失败: {batch_data['failed_count']} 张")
for item in batch_data["results"]:
if item["status"] == "success":
invoice = item["data"]
print(f"\n{item['id']}:")
print(f" 类型: {invoice['invoice_type']['name']}")
print(f" 金额: {invoice['basic_info']['total_amount']}")
else:
print(f"\n{item['id']}: 识别失败")
asyncio.run(recognize_batch_invoices())
```
#### 带错误处理和重试的示例
```python
import asyncio
import base64
import time
from mcp.client.session import ClientSession
from mcp.client.stdio import stdio_client
class InvoiceOCRClient:
def __init__(self, server_command=None):
self.server_command = server_command or ["python", "src/invoice_ocr_mcp/server.py"]
async def recognize_with_retry(self, image_data, max_retries=3):
"""带重试机制的发票识别"""
for attempt in range(max_retries):
try:
async with stdio_client(self.server_command) as streams:
async with ClientSession(streams[0], streams[1]) as session:
await session.initialize()
result = await session.call_tool(
"recognize_single_invoice",
{"image_data": image_data}
)
if result.get("success"):
return result["data"]
else:
error = result.get("error", {})
if error.get("code") == "RATE_LIMIT_EXCEEDED":
wait_time = 2 ** attempt # 指数退避
print(f"请求频率超限,等待 {wait_time} 秒后重试...")
await asyncio.sleep(wait_time)
continue
else:
raise Exception(f"识别失败: {error.get('message', '未知错误')}")
except Exception as e:
if attempt == max_retries - 1:
raise e
print(f"第 {attempt + 1} 次尝试失败: {e}")
await asyncio.sleep(1)
raise Exception("达到最大重试次数")
async def batch_recognize_with_progress(self, image_files, batch_size=10):
"""带进度显示的批量识别"""
results = []
total_files = len(image_files)
for i in range(0, total_files, batch_size):
batch = image_files[i:i + batch_size]
batch_images = []
# 准备批次数据
for file_path in batch:
with open(file_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
batch_images.append({
"id": os.path.basename(file_path),
"image_data": image_data
})
# 处理批次
try:
async with stdio_client(self.server_command) as streams:
async with ClientSession(streams[0], streams[1]) as session:
await session.initialize()
batch_result = await session.call_tool(
"recognize_batch_invoices",
{
"images": batch_images,
"parallel_count": 3
}
)
if batch_result.get("success"):
results.extend(batch_result["data"]["results"])
# 显示进度
processed = min(i + batch_size, total_files)
progress = (processed / total_files) * 100
print(f"进度: {processed}/{total_files} ({progress:.1f}%)")
except Exception as e:
print(f"批次 {i//batch_size + 1} 处理失败: {e}")
return results
# 使用示例
async def main():
client = InvoiceOCRClient()
# 单张识别
with open("invoice.jpg", "rb") as f:
image_data = base64.b64encode(f.read()).decode()
try:
result = await client.recognize_with_retry(image_data)
print("识别成功:", result["basic_info"]["invoice_number"])
except Exception as e:
print("识别失败:", e)
asyncio.run(main())
```
### JavaScript客户端示例
#### Node.js 客户端
```javascript
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import fs from 'fs/promises';
class InvoiceOCRClient {
constructor(serverCommand = ['python', 'src/invoice_ocr_mcp/server.py']) {
this.serverCommand = serverCommand;
}
async recognizeInvoice(imagePath) {
// 读取图像文件
const imageBuffer = await fs.readFile(imagePath);
const imageBase64 = imageBuffer.toString('base64');
// 创建MCP客户端
const transport = new StdioClientTransport({
command: this.serverCommand[0],
args: this.serverCommand.slice(1)
});
const client = new Client(
{ name: "invoice-client", version: "1.0.0" },
{ capabilities: {} }
);
try {
await client.connect(transport);
// 调用识别工具
const result = await client.request(
{ method: "tools/call" },
{
name: "recognize_single_invoice",
arguments: {
image_data: imageBase64,
output_format: "standard"
}
}
);
return result;
} finally {
await client.close();
}
}
async recognizeBatch(imagePaths) {
// 准备图像数据
const images = await Promise.all(
imagePaths.map(async (path, index) => {
const imageBuffer = await fs.readFile(path);
const imageBase64 = imageBuffer.toString('base64');
return {
id: `invoice_${index + 1}`,
image_data: imageBase64
};
})
);
const transport = new StdioClientTransport({
command: this.serverCommand[0],
args: this.serverCommand.slice(1)
});
const client = new Client(
{ name: "invoice-client", version: "1.0.0" },
{ capabilities: {} }
);
try {
await client.connect(transport);
const result = await client.request(
{ method: "tools/call" },
{
name: "recognize_batch_invoices",
arguments: {
images: images,
parallel_count: 3
}
}
);
return result;
} finally {
await client.close();
}
}
}
// 使用示例
async function main() {
const client = new InvoiceOCRClient();
try {
// 单张识别
const result = await client.recognizeInvoice('invoice.jpg');
console.log('识别结果:', result);
// 批量识别
const batchResult = await client.recognizeBatch([
'invoice1.jpg',
'invoice2.jpg',
'invoice3.jpg'
]);
console.log('批量识别结果:', batchResult);
} catch (error) {
console.error('识别失败:', error);
}
}
main();
```
#### Web前端示例
```html
<!DOCTYPE html>
<html>
<head>
<title>发票OCR识别</title>
<style>
.container { max-width: 800px; margin: 0 auto; padding: 20px; }
.upload-area {
border: 2px dashed #ccc;
padding: 40px;
text-align: center;
margin: 20px 0;
}
.result { margin-top: 20px; padding: 20px; background: #f5f5f5; }
.loading { display: none; }
</style>
</head>
<body>
<div class="container">
<h1>发票OCR识别系统</h1>
<div class="upload-area" id="uploadArea">
<p>点击选择发票图片或拖拽到此处</p>
<input type="file" id="fileInput" accept="image/*" multiple style="display: none;">
</div>
<div class="loading" id="loading">
<p>正在识别中,请稍候...</p>
</div>
<div class="result" id="result" style="display: none;">
<h3>识别结果</h3>
<div id="resultContent"></div>
</div>
</div>
<script>
class WebInvoiceOCR {
constructor(apiEndpoint) {
this.apiEndpoint = apiEndpoint;
this.setupEventListeners();
}
setupEventListeners() {
const uploadArea = document.getElementById('uploadArea');
const fileInput = document.getElementById('fileInput');
uploadArea.addEventListener('click', () => fileInput.click());
uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
uploadArea.style.backgroundColor = '#e8f5e8';
});
uploadArea.addEventListener('dragleave', () => {
uploadArea.style.backgroundColor = '';
});
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.style.backgroundColor = '';
this.handleFiles(e.dataTransfer.files);
});
fileInput.addEventListener('change', (e) => {
this.handleFiles(e.target.files);
});
}
async handleFiles(files) {
if (files.length === 0) return;
this.showLoading(true);
this.hideResult();
try {
if (files.length === 1) {
const result = await this.recognizeSingle(files[0]);
this.displaySingleResult(result);
} else {
const results = await this.recognizeBatch(Array.from(files));
this.displayBatchResults(results);
}
} catch (error) {
this.displayError(error.message);
} finally {
this.showLoading(false);
}
}
async recognizeSingle(file) {
const imageBase64 = await this.fileToBase64(file);
const response = await fetch(`${this.apiEndpoint}/recognize`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
tool: 'recognize_single_invoice',
arguments: {
image_data: imageBase64,
output_format: 'standard'
}
})
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
return await response.json();
}
async recognizeBatch(files) {
const images = await Promise.all(
files.map(async (file, index) => ({
id: `invoice_${index + 1}`,
image_data: await this.fileToBase64(file)
}))
);
const response = await fetch(`${this.apiEndpoint}/recognize`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
tool: 'recognize_batch_invoices',
arguments: {
images: images,
parallel_count: 3
}
})
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
return await response.json();
}
fileToBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
const base64 = reader.result.split(',')[1];
resolve(base64);
};
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
displaySingleResult(result) {
const content = document.getElementById('resultContent');
if (result.success) {
const invoice = result.data;
content.innerHTML = `
<h4>${invoice.invoice_type.name}</h4>
<p><strong>发票号码:</strong> ${invoice.basic_info.invoice_number}</p>
<p><strong>开票日期:</strong> ${invoice.basic_info.invoice_date}</p>
<p><strong>总金额:</strong> ¥${invoice.basic_info.total_amount}</p>
<p><strong>销售方:</strong> ${invoice.seller_info.name}</p>
<p><strong>购买方:</strong> ${invoice.buyer_info.name}</p>
<p><strong>置信度:</strong> ${(invoice.meta.confidence_score * 100).toFixed(1)}%</p>
`;
} else {
content.innerHTML = `<p style="color: red;">识别失败: ${result.error.message}</p>`;
}
this.showResult();
}
displayBatchResults(results) {
const content = document.getElementById('resultContent');
if (results.success) {
const data = results.data;
let html = `
<h4>批量识别结果</h4>
<p>总计: ${data.total_count} 张,成功: ${data.success_count} 张,失败: ${data.failed_count} 张</p>
<div>
`;
data.results.forEach(item => {
if (item.status === 'success') {
const invoice = item.data;
html += `
<div style="border: 1px solid #ddd; margin: 10px 0; padding: 10px;">
<h5>${item.id} - ${invoice.invoice_type.name}</h5>
<p>发票号码: ${invoice.basic_info.invoice_number}</p>
<p>金额: ¥${invoice.basic_info.total_amount}</p>
</div>
`;
} else {
html += `
<div style="border: 1px solid #f00; margin: 10px 0; padding: 10px;">
<h5>${item.id} - 识别失败</h5>
</div>
`;
}
});
html += '</div>';
content.innerHTML = html;
} else {
content.innerHTML = `<p style="color: red;">批量识别失败: ${results.error.message}</p>`;
}
this.showResult();
}
displayError(message) {
const content = document.getElementById('resultContent');
content.innerHTML = `<p style="color: red;">错误: ${message}</p>`;
this.showResult();
}
showLoading(show) {
document.getElementById('loading').style.display = show ? 'block' : 'none';
}
showResult() {
document.getElementById('result').style.display = 'block';
}
hideResult() {
document.getElementById('result').style.display = 'none';
}
}
// 初始化应用
const ocr = new WebInvoiceOCR('http://localhost:8000/api');
</script>
</body>
</html>
```
## 高级用例
### 企业级集成示例
```python
import asyncio
import logging
from typing import List, Dict, Any
from datetime import datetime
import json
class EnterpriseInvoiceProcessor:
"""企业级发票处理器"""
def __init__(self, config: Dict[str, Any]):
self.config = config
self.logger = logging.getLogger(__name__)
self.client = InvoiceOCRClient(config.get('server_command'))
async def process_invoice_batch(self, file_paths: List[str]) -> Dict[str, Any]:
"""处理发票批次"""
start_time = datetime.now()
self.logger.info(f"开始处理 {len(file_paths)} 张发票")
try:
# 批量识别
results = await self.client.batch_recognize_with_progress(
file_paths,
batch_size=self.config.get('batch_size', 10)
)
# 数据验证和清洗
validated_results = self.validate_results(results)
# 数据标准化
standardized_results = self.standardize_data(validated_results)
# 保存结果
await self.save_results(standardized_results)
end_time = datetime.now()
processing_time = (end_time - start_time).total_seconds()
summary = {
'total_files': len(file_paths),
'successful': len([r for r in results if r.get('status') == 'success']),
'failed': len([r for r in results if r.get('status') != 'success']),
'processing_time': processing_time,
'timestamp': datetime.now().isoformat()
}
self.logger.info(f"批次处理完成: {summary}")
return summary
except Exception as e:
self.logger.error(f"批次处理失败: {e}")
raise
def validate_results(self, results: List[Dict]) -> List[Dict]:
"""验证识别结果"""
validated = []
for result in results:
if result.get('status') != 'success':
continue
data = result.get('data', {})
basic_info = data.get('basic_info', {})
# 验证必要字段
if not all([
basic_info.get('invoice_number'),
basic_info.get('total_amount'),
basic_info.get('invoice_date')
]):
self.logger.warning(f"发票 {result.get('id')} 缺少必要字段")
continue
# 验证金额格式
try:
amount = float(basic_info['total_amount'])
if amount <= 0:
self.logger.warning(f"发票 {result.get('id')} 金额异常: {amount}")
continue
except ValueError:
self.logger.warning(f"发票 {result.get('id')} 金额格式错误")
continue
validated.append(result)
return validated
def standardize_data(self, results: List[Dict]) -> List[Dict]:
"""数据标准化"""
standardized = []
for result in results:
data = result['data']
# 标准化日期格式
invoice_date = data['basic_info']['invoice_date']
try:
# 尝试解析日期并标准化格式
parsed_date = datetime.strptime(invoice_date, '%Y-%m-%d')
data['basic_info']['invoice_date'] = parsed_date.strftime('%Y-%m-%d')
except ValueError:
self.logger.warning(f"日期格式异常: {invoice_date}")
# 标准化金额格式
for field in ['total_amount', 'tax_amount', 'amount_without_tax']:
if field in data['basic_info']:
amount_str = data['basic_info'][field]
try:
amount = float(amount_str)
data['basic_info'][field] = f"{amount:.2f}"
except ValueError:
pass
# 添加处理元数据
data['processing_meta'] = {
'processed_at': datetime.now().isoformat(),
'processor_version': '1.0.0',
'validation_passed': True
}
standardized.append(result)
return standardized
async def save_results(self, results: List[Dict]):
"""保存处理结果"""
# 保存到数据库
if self.config.get('save_to_database'):
await self.save_to_database(results)
# 保存到文件
if self.config.get('save_to_file'):
await self.save_to_file(results)
# 发送到外部系统
if self.config.get('webhook_url'):
await self.send_webhook(results)
async def save_to_database(self, results: List[Dict]):
"""保存到数据库(示例实现)"""
# 这里是数据库保存逻辑的示例
self.logger.info(f"保存 {len(results)} 条记录到数据库")
async def save_to_file(self, results: List[Dict]):
"""保存到JSON文件"""
filename = f"invoice_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
self.logger.info(f"结果已保存到文件: {filename}")
async def send_webhook(self, results: List[Dict]):
"""发送到Webhook"""
import aiohttp
webhook_url = self.config['webhook_url']
payload = {
'event': 'invoice_batch_processed',
'data': results,
'timestamp': datetime.now().isoformat()
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(webhook_url, json=payload) as response:
if response.status == 200:
self.logger.info("Webhook发送成功")
else:
self.logger.error(f"Webhook发送失败: {response.status}")
except Exception as e:
self.logger.error(f"Webhook发送异常: {e}")
# 使用示例
async def main():
config = {
'server_command': ['python', 'src/invoice_ocr_mcp/server.py'],
'batch_size': 20,
'save_to_database': True,
'save_to_file': True,
'webhook_url': 'https://api.example.com/webhook/invoices'
}
processor = EnterpriseInvoiceProcessor(config)
file_paths = [
'invoices/invoice_001.jpg',
'invoices/invoice_002.jpg',
# ... 更多文件
]
summary = await processor.process_invoice_batch(file_paths)
print("处理完成:", summary)
if __name__ == "__main__":
asyncio.run(main())
```
## 错误处理最佳实践
```python
class RobustInvoiceOCR:
"""健壮的发票OCR客户端"""
def __init__(self):
self.max_retries = 3
self.base_delay = 1
self.max_delay = 60
async def recognize_with_fallback(self, image_data: str) -> Dict[str, Any]:
"""带降级策略的识别"""
# 尝试标准识别
try:
return await self.standard_recognize(image_data)
except Exception as e:
self.logger.warning(f"标准识别失败: {e}")
# 降级到基础OCR
try:
return await self.basic_ocr_recognize(image_data)
except Exception as e:
self.logger.error(f"基础OCR也失败: {e}")
# 最后返回人工处理标记
return {
'success': False,
'requires_manual_processing': True,
'error': 'All OCR methods failed'
}
async def exponential_backoff_retry(self, func, *args, **kwargs):
"""指数退避重试"""
for attempt in range(self.max_retries):
try:
return await func(*args, **kwargs)
except Exception as e:
if attempt == self.max_retries - 1:
raise e
delay = min(self.base_delay * (2 ** attempt), self.max_delay)
self.logger.info(f"第 {attempt + 1} 次尝试失败,{delay}秒后重试")
await asyncio.sleep(delay)
```
这些示例展示了Invoice OCR MCP的各种使用场景,从简单的单张识别到复杂的企业级批量处理系统。根据你的具体需求,可以选择合适的示例作为起点进行开发。