import crypto from "crypto"; import OpenAI from "openai"; import { debug as dbg } from "./debug.js"; import { searchProducts } from "./wooProducts.js"; import { searchProductAliases, getProductEmbedding, upsertProductEmbedding, } from "../db/repo.js"; function getOpenAiKey() { return process.env.OPENAI_API_KEY || process.env.OPENAI_APIKEY || null; } function getEmbeddingsModel() { return process.env.OPENAI_EMBEDDINGS_MODEL || "text-embedding-3-small"; } function normalizeText(s) { return String(s || "") .toLowerCase() .replace(/[¿?¡!.,;:()"]/g, " ") .replace(/\s+/g, " ") .trim(); } function hashText(s) { return crypto.createHash("sha256").update(String(s || "")).digest("hex"); } function cosine(a, b) { if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length || a.length === 0) return 0; let dot = 0; let na = 0; let nb = 0; for (let i = 0; i < a.length; i++) { const x = Number(a[i]) || 0; const y = Number(b[i]) || 0; dot += x * y; na += x * x; nb += y * y; } if (na === 0 || nb === 0) return 0; return dot / (Math.sqrt(na) * Math.sqrt(nb)); } function candidateText(c) { const parts = [c?.name || ""]; if (Array.isArray(c?.categories)) { for (const cat of c.categories) { if (cat?.name) parts.push(cat.name); if (cat?.slug) parts.push(cat.slug); } } if (Array.isArray(c?.attributes)) { for (const a of c.attributes) { if (a?.name) parts.push(a.name); if (Array.isArray(a?.options)) parts.push(a.options.join(" ")); } } return parts.join(" "); } function literalScore(query, candidate) { const q = normalizeText(query); const n = normalizeText(candidate?.name || ""); if (!q || !n) return 0; if (n === q) return 1.0; if (n.includes(q)) return 0.7; const qt = new Set(q.split(" ").filter(Boolean)); const nt = new Set(n.split(" ").filter(Boolean)); let hits = 0; for (const w of qt) if (nt.has(w)) hits++; return hits / Math.max(qt.size, 1); } async function embedText({ tenantId, text }) { const key = getOpenAiKey(); if (!key) return { embedding: null, cached: false, model: null, error: "OPENAI_NO_KEY" }; const content = normalizeText(text); const contentHash = hashText(content); const cached = await getProductEmbedding({ tenant_id: tenantId, content_hash: contentHash }); if (cached?.embedding) { return { embedding: cached.embedding, cached: true, model: cached.model || null }; } const client = new OpenAI({ apiKey: key }); const model = getEmbeddingsModel(); const resp = await client.embeddings.create({ model, input: content, }); const vector = resp?.data?.[0]?.embedding || null; if (Array.isArray(vector)) { await upsertProductEmbedding({ tenant_id: tenantId, content_hash: contentHash, content_text: content, embedding: vector, model, }); } return { embedding: vector, cached: false, model }; } function mergeCandidates(list) { const map = new Map(); for (const c of list) { if (!c?.woo_product_id) continue; const id = Number(c.woo_product_id); if (!map.has(id)) { map.set(id, { ...c }); } else { const prev = map.get(id); map.set(id, { ...prev, ...c, _score: Math.max(prev._score || 0, c._score || 0) }); } } return [...map.values()]; } /** * retrieveCandidates: combina Woo literal + alias + embeddings. */ export async function retrieveCandidates({ tenantId, query, attributes = [], preparation = [], limit = 12, }) { const lim = Math.max(1, Math.min(50, parseInt(limit, 10) || 12)); const q = String(query || "").trim(); if (!q) { return { candidates: [], audit: { reason: "empty_query" } }; } const audit = { query: q, sources: {}, boosts: {}, embeddings: {} }; const aliases = await searchProductAliases({ tenant_id: tenantId, q, limit: 20 }); const aliasBoostByProduct = new Map(); for (const a of aliases) { if (a?.woo_product_id) { const id = Number(a.woo_product_id); const boost = Number(a.boost || 0); aliasBoostByProduct.set(id, Math.max(aliasBoostByProduct.get(id) || 0, boost || 0)); } } audit.sources.aliases = aliases.length; const { items: wooItems, source: wooSource } = await searchProducts({ tenantId, q, limit: lim, forceWoo: true, }); audit.sources.woo = { source: wooSource, count: wooItems?.length || 0 }; let candidates = (wooItems || []).map((c) => { const lit = literalScore(q, c); const boost = aliasBoostByProduct.get(Number(c.woo_product_id)) || 0; return { ...c, _score: lit + boost, _score_detail: { literal: lit, alias_boost: boost } }; }); // embeddings: opcional, si hay key y tenemos candidatos if (candidates.length) { try { const queryEmb = await embedText({ tenantId, text: q }); if (Array.isArray(queryEmb.embedding)) { audit.embeddings.query = { cached: queryEmb.cached, model: queryEmb.model }; const enriched = []; for (const c of candidates.slice(0, 25)) { const text = candidateText(c); const emb = await embedText({ tenantId, text }); const cos = Array.isArray(emb.embedding) ? cosine(queryEmb.embedding, emb.embedding) : 0; const prev = c._score || 0; enriched.push({ ...c, _score: prev + Math.max(0, cos), _score_detail: { ...(c._score_detail || {}), cosine: cos, emb_cached: emb.cached }, }); } // merge con el resto sin embeddings const tail = candidates.slice(25); candidates = mergeCandidates([...enriched, ...tail]); } else { audit.embeddings.query = { error: queryEmb.error || "no_embedding" }; } } catch (e) { audit.embeddings.error = String(e?.message || e); } } candidates.sort((a, b) => (b._score || 0) - (a._score || 0)); const finalList = candidates.slice(0, lim); if (dbg.resolve) { console.log("[catalogRetrieval] candidates", { query: q, top: finalList.slice(0, 5).map((c) => ({ id: c.woo_product_id, name: c.name, score: c._score, detail: c._score_detail, })), }); } return { candidates: finalList, audit }; }