const corsHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type, x-supabase-client-platform, x-supabase-client-platform-version, x-supabase-client-runtime, x-supabase-client-runtime-version", }; function sanitizeJsonCandidate(value: string) { return value .replace(/^```(?:json)?\s*/i, "") .replace(/```$/i, "") .replace(/[\u0000-\u001F\u007F-\u009F]/g, (char) => char === "\n" || char === "\r" || char === "\t" ? char : " ", ) .replace(/,\s*([}\]])/g, "$1") .trim(); } function extractJsonObject(value: string) { const fenced = value.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1]; const source = fenced || value; const start = source.indexOf("{"); if (start === -1) return sanitizeJsonCandidate(source); let depth = 0; let inString = false; let escaped = false; for (let i = start; i < source.length; i++) { const char = source[i]; if (escaped) { escaped = false; continue; } if (char === "\\") { escaped = true; continue; } if (char === '"') { inString = !inString; continue; } if (inString) continue; if (char === "{") depth++; if (char === "}") depth--; if (depth === 0) return sanitizeJsonCandidate(source.slice(start, i + 1)); } throw new Error("AI response was truncated before the JSON object closed"); } function parseAiJson(content: string) { const candidates = [content.trim(), extractJsonObject(content)]; for (const candidate of candidates) { try { return JSON.parse(sanitizeJsonCandidate(candidate)); } catch { // try the next extraction strategy } } throw new Error("Failed to parse AI response as valid JSON"); } Deno.serve(async (req) => { if (req.method === "OPTIONS") { return new Response(null, { headers: corsHeaders }); } try { const { pdf_base64, filename } = await req.json(); if (!pdf_base64) { return new Response(JSON.stringify({ error: "No PDF data provided" }), { status: 400, headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } const LOVABLE_API_KEY = Deno.env.get("LOVABLE_API_KEY"); if (!LOVABLE_API_KEY) { return new Response(JSON.stringify({ error: "AI API key not configured" }), { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } const prompt = `You are a meticulous invoice data extraction AI. Analyze the provided PDF invoice carefully — examine EVERY page and EVERY line item, including continuation pages, sub-totals, and tables. Do not skip or summarize line items; capture each one individually exactly as it appears. CRITICAL RULES: - Read the entire document end-to-end before responding. - Extract EVERY line item separately, even if there are 50+ rows. Do not collapse, group, or omit any. - Preserve the exact wording from the invoice for descriptions and names. - For dates, convert to YYYY-MM-DD. If only month/year is shown, use the 1st of the month. - For monetary values, use numbers only (no currency symbols, no commas). Negative amounts (credits/discounts) should be negative numbers. - If a field truly cannot be determined from the document, use null for strings and 0 for numbers — do NOT guess. - The sum of line item amounts should reconcile with the subtotal; double-check before returning. - Return a complete, syntactically valid JSON object with no markdown fences, comments, trailing commas, or extra text. - The sum of line item amounts should reconcile with the subtotal; double-check before returning. Return ONLY valid JSON (no markdown, no code blocks, no commentary) with this exact structure: { "vendor_name": "string", "vendor_address": "string or null", "vendor_phone": "string or null", "client_name": "string or null", "client_address": "string or null", "invoice_number": "string", "invoice_date": "YYYY-MM-DD", "due_date": "YYYY-MM-DD or null", "service_period": "string or null", "subtotal": number, "tax": number, "other_charges": number, "total_amount": number, "currency": "USD", "payment_terms": "string or null", "notes": "string or null", "line_items": [ { "line_number": number, "description": "string (full description as printed)", "name": "string (short item name/SKU if present, else first words of description)", "date": "YYYY-MM-DD or null", "quantity": number or null, "unit_price": number or null, "amount": number, "category": "string or null", "notes": "string or null" } ] }`; const response = await fetch("https://ai.gateway.lovable.dev/v1/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${LOVABLE_API_KEY}`, }, body: JSON.stringify({ model: "google/gemini-2.5-pro", messages: [ { role: "system", content: prompt }, { role: "user", content: [ { type: "text", text: `Extract ALL invoice data from this PDF file named "${filename || "invoice.pdf"}". Examine every page and capture every line item — do not skip or summarize any rows. Return only the JSON object.`, }, { type: "image_url", image_url: { url: `data:application/pdf;base64,${pdf_base64}`, }, }, ], }, ], temperature: 0, max_tokens: 32000, response_format: { type: "json_object" }, }), }); if (!response.ok) { const errText = await response.text(); console.error("AI Gateway error:", errText); return new Response(JSON.stringify({ error: `AI processing failed: ${response.status}` }), { status: 502, headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } const aiResult = await response.json(); const content = aiResult.choices?.[0]?.message?.content || ""; let parsed; try { parsed = parseAiJson(typeof content === "string" ? content : JSON.stringify(content)); } catch (parseError) { console.error("Failed to parse AI response:", content); return new Response(JSON.stringify({ error: parseError.message || "Failed to parse AI response" }), { status: 422, headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } return new Response(JSON.stringify({ data: parsed }), { headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } catch (err) { console.error("parse-invoice error:", err); return new Response(JSON.stringify({ error: err.message }), { status: 500, headers: { ...corsHeaders, "Content-Type": "application/json" }, }); } });