Accounts payable teams spend an average of 3.7 minutes manually processing each invoice. At 200 invoices per month, that's 12+ hours of data entry. Here's how to build an automated pipeline that brings this to under 10 seconds per document.
Pipeline Architecture
Email/SFTP/API → Receive → Extract → Validate → Enrich → Store → Notify
Each stage is independent and can fail gracefully without losing the document.
Stage 1: Document Ingestion
Accept invoices from multiple sources:
const express = require('express');
const multer = require('multer');
const path = require('path');
const upload = multer({
dest: '/tmp/invoices',
limits: { fileSize: 20 * 1024 * 1024 }, // 20MB
fileFilter: (req, file, cb) => {
const allowed = ['.pdf', '.docx', '.xlsx', '.png', '.jpg'];
const ext = path.extname(file.originalname).toLowerCase();
cb(null, allowed.includes(ext));
},
});
app.post('/api/invoices/upload', upload.array('files', 20), async (req, res) => {
const jobs = req.files.map(file => ({
id: generateJobId(),
path: file.path,
filename: file.originalname,
status: 'queued',
}));
await queue.addBatch(jobs);
res.json({ jobs: jobs.map(j => ({ id: j.id, status: j.status })) });
});
Stage 2: Extraction
async function extractInvoiceData(job) {
const formData = new FormData();
formData.append('file', fs.createReadStream(job.path), job.filename);
formData.append('fields', JSON.stringify([
'invoice_number', 'invoice_date', 'due_date',
'vendor_name', 'vendor_address', 'vendor_tax_id',
'line_items', 'subtotal', 'tax_amount', 'total_amount',
'currency', 'payment_terms',
]));
const response = await fetch('https://parseflow.dev/api/extract', {
method: 'POST',
headers: { 'Authorization': `Bearer ${process.env.PARSEFLOW_KEY}` },
body: formData,
});
if (!response.ok) {
const error = await response.json();
throw new Error(`Extraction failed: ${error.message}`);
}
return response.json();
}
Stage 3: Validation
Never trust extracted data without validation:
function validateInvoice(data) {
const errors = [];
// Required fields
if (!data.invoice_number) errors.push('Missing invoice number');
if (!data.vendor_name) errors.push('Missing vendor name');
if (!data.total_amount) errors.push('Missing total amount');
// Math validation
if (data.line_items?.length > 0) {
const lineTotal = data.line_items.reduce((sum, item) => sum + item.total, 0);
const tolerance = 0.02; // 2 cents tolerance for rounding
if (Math.abs(lineTotal - data.subtotal) > tolerance) {
errors.push(`Line items sum (${lineTotal}) != subtotal (${data.subtotal})`);
}
}
if (data.subtotal && data.tax_amount && data.total_amount) {
const expected = data.subtotal + data.tax_amount;
if (Math.abs(expected - data.total_amount) > 0.02) {
errors.push(`Subtotal + tax (${expected}) != total (${data.total_amount})`);
}
}
// Duplicate detection
// (check against your DB for same invoice_number + vendor)
return { valid: errors.length === 0, errors };
}
Stage 4: Enrichment
Match the vendor to your supplier database:
async function enrichInvoice(data) {
// Fuzzy match vendor name to known suppliers
const vendor = await db.suppliers.findBestMatch(data.vendor_name);
if (vendor) {
data.supplier_id = vendor.id;
data.gl_account = vendor.default_gl_account;
data.cost_center = vendor.default_cost_center;
data.approver_email = vendor.approver_email;
data.payment_method = vendor.preferred_payment_method;
} else {
data.requires_review = true;
data.review_reason = 'Unknown vendor — manual matching required';
}
return data;
}
Stage 5: Notifications
async function notifyApprover(invoice) {
// Only for invoices above threshold or from unknown vendors
if (invoice.total_amount > 5000 || invoice.requires_review) {
await emailService.send({
to: invoice.approver_email,
subject: `Invoice approval required: ${invoice.invoice_number} — ${invoice.vendor_name}`,
template: 'invoice-approval',
data: invoice,
});
}
}
Error Handling and Dead Letter Queue
async function processJob(job) {
try {
job.status = 'processing';
const extracted = await extractInvoiceData(job);
const validation = validateInvoice(extracted);
if (!validation.valid) {
job.status = 'validation_failed';
job.errors = validation.errors;
await moveToReview(job);
return;
}
const enriched = await enrichInvoice(extracted);
await db.invoices.create({ ...enriched, job_id: job.id });
await notifyApprover(enriched);
job.status = 'completed';
} catch (err) {
job.attempts++;
if (job.attempts >= 3) {
job.status = 'dead_letter';
await alertOps(job, err);
} else {
job.status = 'retry';
job.retry_after = addMinutes(new Date(), job.attempts * 15);
}
}
await db.jobs.update(job);
}
Results
A pipeline like this, using ParseFlow for the extraction stage, processes a typical invoice in 4-8 seconds with 94%+ field accuracy across variable formats. The validation stage catches the remaining edge cases and routes them to a human reviewer queue rather than silently accepting bad data.
The full pipeline handles PDF, Word, and Excel with the same code path — no special-casing per format.
United States
NORTH AMERICA
Related News
How Braze’s CTO is rethinking engineering for the agentic area
10h ago
Amazon Employees Are 'Tokenmaxxing' Due To Pressure To Use AI Tools
21h ago

Implementing Multicloud Data Sharding with Hexagonal Storage Adapters
15h ago

DeepMind’s CEO Says AGI May Be ~4 Years Away. The Last Three Missing Pieces Are Not What Most People Think.
15h ago

CCSnapshot - A Claude Code Configs Transfer Tool
21h ago