added NLU v3 functionality with JSON schema validation and error handling in OpenAI service

2026-01-14 18:16:59 -03:00
parent 47ba68049f
commit 29fa2d127e
5 changed files with 345 additions and 1 deletions
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,6 +9,7 @@
      "version": "1.0.0",
      "license": "MIT",
      "dependencies": {
+        "ajv": "^8.17.1",
        "cors": "^2.8.5",
        "dotenv": "^17.2.3",
        "express": "^4.19.2",
@@ -133,6 +134,22 @@
        "node": ">= 0.6"
      }
    },
+    "node_modules/ajv": {
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
+      "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.3",
+        "fast-uri": "^3.0.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
    "node_modules/anymatch": {
      "version": "3.1.3",
      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
@@ -519,6 +536,28 @@
        "url": "https://opencollective.com/express"
      }
    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "license": "MIT"
+    },
+    "node_modules/fast-uri": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
+      "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
    "node_modules/fill-range": {
      "version": "7.1.1",
      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
@@ -788,6 +827,12 @@
        "node": ">=0.12.0"
      }
    },
+    "node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+      "license": "MIT"
+    },
    "node_modules/math-intrinsics": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
@@ -1231,6 +1276,15 @@
        "node": ">=8.10.0"
      }
    },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
    "node_modules/safe-buffer": {
      "version": "5.2.1",
      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
--- a/package.json
+++ b/package.json
@@ -16,6 +16,7 @@
  "author": "Lucas Tettamanti",
  "license": "MIT",
  "dependencies": {
+    "ajv": "^8.17.1",
    "cors": "^2.8.5",
    "dotenv": "^17.2.3",
    "express": "^4.19.2",
--- a/src/services/fsm.js
+++ b/src/services/fsm.js
@@ -0,0 +1,153 @@
+/**
+ * FSM autoritativa (server-side) para el flujo conversacional.
+ *
+ * Principios:
+ * - El LLM NO decide estados. Solo NLU.
+ * - El backend deriva el estado objetivo a partir del contexto + acciones.
+ * - Validamos transiciones y, si algo queda inconsistente, caemos a ERROR_RECOVERY.
+ */
+
+export const ConversationState = Object.freeze({
+  IDLE: "IDLE",
+  BROWSING: "BROWSING",
+  AWAITING_QUANTITY: "AWAITING_QUANTITY",
+  CART_ACTIVE: "CART_ACTIVE",
+  AWAITING_ADDRESS: "AWAITING_ADDRESS",
+  AWAITING_PAYMENT: "AWAITING_PAYMENT",
+  COMPLETED: "COMPLETED",
+  ERROR_RECOVERY: "ERROR_RECOVERY",
+});
+
+export const ALL_STATES = Object.freeze(Object.values(ConversationState));
+
+function hasBasketItems(ctx) {
+  const items = ctx?.basket?.items || ctx?.order_basket?.items;
+  return Array.isArray(items) && items.length > 0;
+}
+
+function hasPendingClarification(ctx) {
+  const pc = ctx?.pending_clarification;
+  return Boolean(pc?.candidates?.length) || Boolean(pc?.options?.length);
+}
+
+function hasPendingItem(ctx) {
+  return Boolean(ctx?.pending_item?.product_id || ctx?.pending_item?.sku);
+}
+
+function hasAddress(ctx) {
+  return Boolean(ctx?.delivery_address?.text || ctx?.address?.text || ctx?.address_text);
+}
+
+function hasWooOrder(ctx) {
+  return Boolean(ctx?.woo_order_id || ctx?.last_order_id);
+}
+
+function hasPaymentLink(ctx) {
+  return Boolean(ctx?.mp?.init_point || ctx?.payment?.init_point || ctx?.payment_link);
+}
+
+function isPaid(ctx) {
+  const st =
+    ctx?.mp?.payment_status ||
+    ctx?.payment?.status ||
+    ctx?.payment_status ||
+    null;
+  return st === "approved" || st === "paid";
+}
+
+/**
+ * Deriva el estado objetivo según el contexto actual y señales del turno.
+ * `signals` es información determinística del motor del turno (no del LLM),
+ * por ejemplo: { requested_checkout: true }.
+ */
+export function deriveNextState(prevState, ctx = {}, signals = {}) {
+  // Regla 1: pago confirmado gana siempre
+  if (isPaid(ctx)) return ConversationState.COMPLETED;
+
+  // Regla 2: si ya existe orden + link de pago, estamos esperando pago
+  if (hasWooOrder(ctx) && hasPaymentLink(ctx)) return ConversationState.AWAITING_PAYMENT;
+
+  // Regla 3: si intentó checkout pero falta dirección
+  if ((signals.requested_checkout || signals.requested_address) && hasBasketItems(ctx) && !hasAddress(ctx)) {
+    return ConversationState.AWAITING_ADDRESS;
+  }
+
+  // Regla 4: si hay item pendiente sin completar cantidad
+  if (hasPendingItem(ctx) && !signals.pending_item_completed) {
+    return ConversationState.AWAITING_QUANTITY;
+  }
+
+  // Regla 5: si hay carrito activo
+  if (hasBasketItems(ctx)) return ConversationState.CART_ACTIVE;
+
+  // Regla 6: si estamos mostrando opciones / esperando selección
+  if (hasPendingClarification(ctx) || signals.did_show_options || signals.is_browsing) {
+    return ConversationState.BROWSING;
+  }
+
+  return ConversationState.IDLE;
+}
+
+const ALLOWED = Object.freeze({
+  [ConversationState.IDLE]: [
+    ConversationState.IDLE,
+    ConversationState.BROWSING,
+    ConversationState.AWAITING_QUANTITY,
+    ConversationState.CART_ACTIVE,
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.BROWSING]: [
+    ConversationState.BROWSING,
+    ConversationState.AWAITING_QUANTITY,
+    ConversationState.CART_ACTIVE,
+    ConversationState.IDLE,
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.AWAITING_QUANTITY]: [
+    ConversationState.AWAITING_QUANTITY,
+    ConversationState.CART_ACTIVE,
+    ConversationState.BROWSING,
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.CART_ACTIVE]: [
+    ConversationState.CART_ACTIVE,
+    ConversationState.AWAITING_ADDRESS,
+    ConversationState.AWAITING_PAYMENT,
+    ConversationState.ERROR_RECOVERY,
+    ConversationState.BROWSING,
+  ],
+  [ConversationState.AWAITING_ADDRESS]: [
+    ConversationState.AWAITING_ADDRESS,
+    ConversationState.AWAITING_PAYMENT,
+    ConversationState.CART_ACTIVE,
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.AWAITING_PAYMENT]: [
+    ConversationState.AWAITING_PAYMENT,
+    ConversationState.COMPLETED,
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.COMPLETED]: [
+    ConversationState.COMPLETED,
+    ConversationState.IDLE, // nueva conversación / reinicio natural
+    ConversationState.ERROR_RECOVERY,
+  ],
+  [ConversationState.ERROR_RECOVERY]: ALL_STATES,
+});
+
+export function validateTransition(prevState, nextState) {
+  const p = prevState || ConversationState.IDLE;
+  const n = nextState || ConversationState.IDLE;
+  if (!ALLOWED[p]) return { ok: false, reason: "unknown_prev_state", prev: p, next: n };
+  if (!ALL_STATES.includes(n)) return { ok: false, reason: "unknown_next_state", prev: p, next: n };
+  const ok = ALLOWED[p].includes(n);
+  return ok ? { ok: true } : { ok: false, reason: "invalid_transition", prev: p, next: n };
+}
+
+export function safeNextState(prevState, ctx, signals) {
+  const desired = deriveNextState(prevState, ctx, signals);
+  const v = validateTransition(prevState, desired);
+  if (v.ok) return { next_state: desired, validation: v };
+  return { next_state: ConversationState.ERROR_RECOVERY, validation: v };
+}
+
--- a/src/services/openai.js
+++ b/src/services/openai.js
@@ -1,5 +1,6 @@
 import OpenAI from "openai";
 import { z } from "zod";
+import Ajv from "ajv";
 import { debug as dbg } from "./debug.js";

 let _client = null;
@@ -126,6 +127,141 @@ async function jsonCompletion({ system, user, model }) {
  return { parsed, raw_text: text, model: chosenModel, usage: resp?.usage || null };
 }

+// --- NLU v3 (single-step, schema-strict) ---
+
+const NluV3JsonSchema = {
+  $id: "NluV3",
+  type: "object",
+  additionalProperties: false,
+  required: ["intent", "confidence", "language", "entities", "needs"],
+  properties: {
+    intent: {
+      type: "string",
+      enum: ["price_query", "browse", "add_to_cart", "remove_from_cart", "checkout", "greeting", "other"],
+    },
+    confidence: { type: "number", minimum: 0, maximum: 1 },
+    language: { type: "string" },
+    entities: {
+      type: "object",
+      additionalProperties: false,
+      required: ["product_query", "quantity", "unit", "selection", "attributes", "preparation"],
+      properties: {
+        product_query: { anyOf: [{ type: "string" }, { type: "null" }] },
+        quantity: { anyOf: [{ type: "number" }, { type: "null" }] },
+        unit: { anyOf: [{ type: "string", enum: ["kg", "g", "unidad"] }, { type: "null" }] },
+        selection: {
+          anyOf: [
+            { type: "null" },
+            {
+              type: "object",
+              additionalProperties: false,
+              required: ["type", "value"],
+              properties: {
+                type: { type: "string", enum: ["index", "text", "sku"] },
+                value: { type: "string", minLength: 1 },
+              },
+            },
+          ],
+        },
+        attributes: { type: "array", items: { type: "string" } },
+        preparation: { type: "array", items: { type: "string" } },
+      },
+    },
+    needs: {
+      type: "object",
+      additionalProperties: false,
+      required: ["catalog_lookup", "knowledge_lookup"],
+      properties: {
+        catalog_lookup: { type: "boolean" },
+        knowledge_lookup: { type: "boolean" },
+      },
+    },
+  },
+};
+
+const ajv = new Ajv({ allErrors: true, strict: true });
+const validateNluV3 = ajv.compile(NluV3JsonSchema);
+
+function nluV3Fallback() {
+  return {
+    intent: "other",
+    confidence: 0,
+    language: "es-AR",
+    entities: {
+      product_query: null,
+      quantity: null,
+      unit: null,
+      selection: null,
+      attributes: [],
+      preparation: [],
+    },
+    needs: { catalog_lookup: false, knowledge_lookup: false },
+  };
+}
+
+function nluV3Errors() {
+  const errs = validateNluV3.errors || [];
+  return errs.map((e) => ({
+    instancePath: e.instancePath,
+    schemaPath: e.schemaPath,
+    keyword: e.keyword,
+    message: e.message,
+    params: e.params,
+  }));
+}
+
+export async function llmNluV3({ input, model } = {}) {
+  const systemBase =
+    "Sos un servicio NLU (es-AR). Extraés intención y entidades del mensaje del usuario.\n" +
+    "IMPORTANTE:\n" +
+    "- NO decidas estados (FSM), NO planifiques acciones, NO inventes productos ni precios.\n" +
+    "- Respondé SOLO con JSON válido, EXACTAMENTE con las keys del contrato. additionalProperties=false.\n" +
+    "- Si hay opciones mostradas y el usuario responde con un número/ordinal ('el segundo'), eso es entities.selection {type:'index'}.\n" +
+    "- Si el usuario responde 'mostrame más', poné intent='browse' y entities.selection=null (la paginación la maneja el servidor).\n" +
+    "- needs.catalog_lookup debe ser true para intents price_query|browse|add_to_cart si NO es una pura selección sobre opciones ya mostradas.\n";
+
+  const user = JSON.stringify(input ?? {});
+
+  // intento 1
+  const first = await jsonCompletion({ system: systemBase, user, model });
+  if (validateNluV3(first.parsed)) {
+    return { nlu: first.parsed, raw_text: first.raw_text, model: first.model, usage: first.usage, schema: "v3", validation: { ok: true } };
+  }
+
+  const errors1 = nluV3Errors();
+
+  // retry 1 vez
+  const systemRetry =
+    systemBase +
+    "\nTu respuesta anterior no validó el JSON Schema. Corregí el JSON para que cumpla estrictamente.\n" +
+    `Errores: ${JSON.stringify(errors1).slice(0, 1800)}\n`;
+
+  try {
+    const second = await jsonCompletion({ system: systemRetry, user, model });
+    if (validateNluV3(second.parsed)) {
+      return { nlu: second.parsed, raw_text: second.raw_text, model: second.model, usage: second.usage, schema: "v3", validation: { ok: true, retried: true } };
+    }
+    const errors2 = nluV3Errors();
+    return {
+      nlu: nluV3Fallback(),
+      raw_text: second.raw_text,
+      model: second.model,
+      usage: second.usage,
+      schema: "v3",
+      validation: { ok: false, retried: true, errors: errors2 },
+    };
+  } catch (e) {
+    return {
+      nlu: nluV3Fallback(),
+      raw_text: first.raw_text,
+      model: first.model,
+      usage: first.usage,
+      schema: "v3",
+      validation: { ok: false, retried: true, error: String(e?.message || e), errors: errors1 },
+    };
+  }
+}
+
 /**
 * Genera un "plan" de conversación (salida estructurada) usando OpenAI.
 *