Files
acmcc/supabase/functions/parse-invoice/index.ts
2026-06-01 20:19:26 -04:00

197 lines
6.8 KiB
TypeScript

const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers":
"authorization, x-client-info, apikey, content-type, x-supabase-client-platform, x-supabase-client-platform-version, x-supabase-client-runtime, x-supabase-client-runtime-version",
};
function sanitizeJsonCandidate(value: string) {
return value
.replace(/^```(?:json)?\s*/i, "")
.replace(/```$/i, "")
.replace(/[\u0000-\u001F\u007F-\u009F]/g, (char) =>
char === "\n" || char === "\r" || char === "\t" ? char : " ",
)
.replace(/,\s*([}\]])/g, "$1")
.trim();
}
function extractJsonObject(value: string) {
const fenced = value.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
const source = fenced || value;
const start = source.indexOf("{");
if (start === -1) return sanitizeJsonCandidate(source);
let depth = 0;
let inString = false;
let escaped = false;
for (let i = start; i < source.length; i++) {
const char = source[i];
if (escaped) {
escaped = false;
continue;
}
if (char === "\\") {
escaped = true;
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (char === "{") depth++;
if (char === "}") depth--;
if (depth === 0) return sanitizeJsonCandidate(source.slice(start, i + 1));
}
throw new Error("AI response was truncated before the JSON object closed");
}
function parseAiJson(content: string) {
const candidates = [content.trim(), extractJsonObject(content)];
for (const candidate of candidates) {
try {
return JSON.parse(sanitizeJsonCandidate(candidate));
} catch {
// try the next extraction strategy
}
}
throw new Error("Failed to parse AI response as valid JSON");
}
Deno.serve(async (req) => {
if (req.method === "OPTIONS") {
return new Response(null, { headers: corsHeaders });
}
try {
const { pdf_base64, filename } = await req.json();
if (!pdf_base64) {
return new Response(JSON.stringify({ error: "No PDF data provided" }), {
status: 400,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
const LOVABLE_API_KEY = Deno.env.get("LOVABLE_API_KEY");
if (!LOVABLE_API_KEY) {
return new Response(JSON.stringify({ error: "AI API key not configured" }), {
status: 500,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
const prompt = `You are a meticulous invoice data extraction AI. Analyze the provided PDF invoice carefully — examine EVERY page and EVERY line item, including continuation pages, sub-totals, and tables. Do not skip or summarize line items; capture each one individually exactly as it appears.
CRITICAL RULES:
- Read the entire document end-to-end before responding.
- Extract EVERY line item separately, even if there are 50+ rows. Do not collapse, group, or omit any.
- Preserve the exact wording from the invoice for descriptions and names.
- For dates, convert to YYYY-MM-DD. If only month/year is shown, use the 1st of the month.
- For monetary values, use numbers only (no currency symbols, no commas). Negative amounts (credits/discounts) should be negative numbers.
- If a field truly cannot be determined from the document, use null for strings and 0 for numbers — do NOT guess.
- The sum of line item amounts should reconcile with the subtotal; double-check before returning.
- Return a complete, syntactically valid JSON object with no markdown fences, comments, trailing commas, or extra text.
- The sum of line item amounts should reconcile with the subtotal; double-check before returning.
Return ONLY valid JSON (no markdown, no code blocks, no commentary) with this exact structure:
{
"vendor_name": "string",
"vendor_address": "string or null",
"vendor_phone": "string or null",
"client_name": "string or null",
"client_address": "string or null",
"invoice_number": "string",
"invoice_date": "YYYY-MM-DD",
"due_date": "YYYY-MM-DD or null",
"service_period": "string or null",
"subtotal": number,
"tax": number,
"other_charges": number,
"total_amount": number,
"currency": "USD",
"payment_terms": "string or null",
"notes": "string or null",
"line_items": [
{
"line_number": number,
"description": "string (full description as printed)",
"name": "string (short item name/SKU if present, else first words of description)",
"date": "YYYY-MM-DD or null",
"quantity": number or null,
"unit_price": number or null,
"amount": number,
"category": "string or null",
"notes": "string or null"
}
]
}`;
const response = await fetch("https://ai.gateway.lovable.dev/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${LOVABLE_API_KEY}`,
},
body: JSON.stringify({
model: "google/gemini-2.5-pro",
messages: [
{ role: "system", content: prompt },
{
role: "user",
content: [
{
type: "text",
text: `Extract ALL invoice data from this PDF file named "${filename || "invoice.pdf"}". Examine every page and capture every line item — do not skip or summarize any rows. Return only the JSON object.`,
},
{
type: "image_url",
image_url: {
url: `data:application/pdf;base64,${pdf_base64}`,
},
},
],
},
],
temperature: 0,
max_tokens: 32000,
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errText = await response.text();
console.error("AI Gateway error:", errText);
return new Response(JSON.stringify({ error: `AI processing failed: ${response.status}` }), {
status: 502,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
const aiResult = await response.json();
const content = aiResult.choices?.[0]?.message?.content || "";
let parsed;
try {
parsed = parseAiJson(typeof content === "string" ? content : JSON.stringify(content));
} catch (parseError) {
console.error("Failed to parse AI response:", content);
return new Response(JSON.stringify({ error: parseError.message || "Failed to parse AI response" }), {
status: 422,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
return new Response(JSON.stringify({ data: parsed }), {
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
} catch (err) {
console.error("parse-invoice error:", err);
return new Response(JSON.stringify({ error: err.message }), {
status: 500,
headers: { ...corsHeaders, "Content-Type": "application/json" },
});
}
});