Files
acmcc/supabase/functions/parse-invoice/index.ts
T
admin 71cc71f89f Inbound invoices: recipient-alias routing + parser hardening
parse-invoice: guard oversized PDFs (>18MB → clear "too large, saved for manual
entry" message) and surface the AI gateway's actual error instead of a bare
status code.

inbound-bill-email: route to an association by the recipient alias
(<alias>@bills.avriamail.com, via associations.inbound_alias) in addition to the
sender's vendor mapping; fix extractEmail (bare addresses were mis-split, e.g.
invoices@x → s@x); surface parse-invoice's real error in the inbox. Deployed via
MCP; migration associations_inbound_alias adds + populates the aliases.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 22:13:49 -04:00

216 lines
7.9 KiB
TypeScript

const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers":
"authorization, x-client-info, apikey, content-type, x-supabase-client-platform, x-supabase-client-platform-version, x-supabase-client-runtime, x-supabase-client-runtime-version",
};
function sanitizeJsonCandidate(value: string) {
return value
.replace(/^```(?:json)?\s*/i, "")
.replace(/```$/i, "")
.replace(/[\u0000-\u001F\u007F-\u009F]/g, (char) =>
char === "\n" || char === "\r" || char === "\t" ? char : " ",
)
.replace(/,\s*([}\]])/g, "$1")
.trim();
}
function extractJsonObject(value: string) {
const fenced = value.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
const source = fenced || value;
const start = source.indexOf("{");
if (start === -1) return sanitizeJsonCandidate(source);
let depth = 0;
let inString = false;
let escaped = false;
for (let i = start; i < source.length; i++) {
const char = source[i];
if (escaped) {
escaped = false;
continue;
}
if (char === "\\") {
escaped = true;
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (char === "{") depth++;
if (char === "}") depth--;
if (depth === 0) return sanitizeJsonCandidate(source.slice(start, i + 1));
}
throw new Error("AI response was truncated before the JSON object closed");
}
function parseAiJson(content: string) {
const candidates = [content.trim(), extractJsonObject(content)];
for (const candidate of candidates) {
try {
return JSON.parse(sanitizeJsonCandidate(candidate));
} catch {
// try the next extraction strategy
}
}
throw new Error("Failed to parse AI response as valid JSON");
}
Deno.serve(async (req) => {
if (req.method === "OPTIONS") {
return new Response(null, { headers: corsHeaders });
}
try {
const { pdf_base64, filename } = await req.json();
if (!pdf_base64) {
return new Response(JSON.stringify({ error: "No PDF data provided" }), {
status: 400,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
const LOVABLE_API_KEY = Deno.env.get("LOVABLE_API_KEY");
if (!LOVABLE_API_KEY) {
return new Response(JSON.stringify({ error: "AI API key not configured" }), {
status: 500,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
// Guard against oversized/scanned PDFs that the AI gateway rejects with an
// opaque error. The caller (inbound-bill-email) still keeps the attachment
// in the inbox for manual entry when this returns an error.
const approxBytes = Math.floor((pdf_base64.length * 3) / 4);
const sizeMB = approxBytes / (1024 * 1024);
const MAX_MB = 18;
if (sizeMB > MAX_MB) {
return new Response(
JSON.stringify({ error: `PDF too large to auto-parse (${sizeMB.toFixed(1)} MB; limit ${MAX_MB} MB). Saved to the inbox for manual entry.` }),
{ status: 413, headers: { ...corsHeaders, "Content-Type": "application/json" } },
);
}
const prompt = `You are a meticulous invoice data extraction AI. Analyze the provided PDF invoice carefully — examine EVERY page and EVERY line item, including continuation pages, sub-totals, and tables. Do not skip or summarize line items; capture each one individually exactly as it appears.
CRITICAL RULES:
- Read the entire document end-to-end before responding.
- Extract EVERY line item separately, even if there are 50+ rows. Do not collapse, group, or omit any.
- Preserve the exact wording from the invoice for descriptions and names.
- For dates, convert to YYYY-MM-DD. If only month/year is shown, use the 1st of the month.
- For monetary values, use numbers only (no currency symbols, no commas). Negative amounts (credits/discounts) should be negative numbers.
- If a field truly cannot be determined from the document, use null for strings and 0 for numbers — do NOT guess.
- The sum of line item amounts should reconcile with the subtotal; double-check before returning.
- Return a complete, syntactically valid JSON object with no markdown fences, comments, trailing commas, or extra text.
- The sum of line item amounts should reconcile with the subtotal; double-check before returning.
Return ONLY valid JSON (no markdown, no code blocks, no commentary) with this exact structure:
{
"vendor_name": "string",
"vendor_address": "string or null",
"vendor_phone": "string or null",
"client_name": "string or null",
"client_address": "string or null",
"invoice_number": "string",
"invoice_date": "YYYY-MM-DD",
"due_date": "YYYY-MM-DD or null",
"service_period": "string or null",
"subtotal": number,
"tax": number,
"other_charges": number,
"total_amount": number,
"currency": "USD",
"payment_terms": "string or null",
"notes": "string or null",
"line_items": [
{
"line_number": number,
"description": "string (full description as printed)",
"name": "string (short item name/SKU if present, else first words of description)",
"date": "YYYY-MM-DD or null",
"quantity": number or null,
"unit_price": number or null,
"amount": number,
"category": "string or null",
"notes": "string or null"
}
]
}`;
const response = await fetch("https://ai.gateway.lovable.dev/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${LOVABLE_API_KEY}`,
},
body: JSON.stringify({
model: "google/gemini-2.5-pro",
messages: [
{ role: "system", content: prompt },
{
role: "user",
content: [
{
type: "text",
text: `Extract ALL invoice data from this PDF file named "${filename || "invoice.pdf"}". Examine every page and capture every line item — do not skip or summarize any rows. Return only the JSON object.`,
},
{
type: "image_url",
image_url: {
url: `data:application/pdf;base64,${pdf_base64}`,
},
},
],
},
],
temperature: 0,
max_tokens: 32000,
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errText = await response.text();
console.error("AI Gateway error:", response.status, errText);
const snippet = errText.replace(/\s+/g, " ").slice(0, 300);
const hint = response.status === 429 || response.status === 402
? " (AI gateway rate/credit limit)"
: response.status === 413 || /too large|payload|size/i.test(errText)
? " (PDF too large for the AI gateway — saved to inbox for manual entry)"
: "";
return new Response(
JSON.stringify({ error: `AI gateway error ${response.status}${hint}: ${snippet || "no detail"}` }),
{ status: 502, headers: { ...corsHeaders, "Content-Type": "application/json" } },
);
}
const aiResult = await response.json();
const content = aiResult.choices?.[0]?.message?.content || "";
let parsed;
try {
parsed = parseAiJson(typeof content === "string" ? content : JSON.stringify(content));
} catch (parseError) {
console.error("Failed to parse AI response:", content);
return new Response(JSON.stringify({ error: parseError.message || "Failed to parse AI response" }), {
status: 422,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
return new Response(JSON.stringify({ data: parsed }), {
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
} catch (err) {
console.error("parse-invoice error:", err);
return new Response(JSON.stringify({ error: err.message }), {
status: 500,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
});