Redesign: agente tool-calling con DeepSeek (D2-D10 del plan)

Reemplaza el NLU rígido (intent+entities) por un agente LLM con tool-calling que decide y muta estado en cada turno. Opt-in vía AGENT_TURN_ENGINE=1. DeepSeek V4 (deepseek-chat) configurado como modelo (OpenAI-compatible). Arquitectura nueva en src/modules/3-turn-engine/agent/: - workingMemory.js: arma el JSON contextual que recibe el LLM cada turno (cart, pending, last_shown_options, store, customer_profile, history, preparsed quantity). - systemPrompt.js: prompt estático ~70 líneas. Define rol + reglas duras + cómo procesar mensajes + cómo escribir el say. Sin enumeración de intents. - runTurn.js: loop de tool-calling con tool_choice="required". Cap 10 tool calls / 20s timeout. Métricas in-memory. - customerProfile.js: lookup de frequent_items en woo_orders_cache por teléfono (chat_id → phone), top 5 últimos 6 meses. Cache 10 min. - tools/schemas.js: 11 tools (search_catalog, add_to_cart, set_quantity, select_candidate, remove_from_cart, set_shipping, set_address, confirm_order, pause, escalate_to_human, say). - tools/executor.js: validación Ajv + dispatch + observación al LLM. woo_id se valida contra snapshot — si no existe el agente vuelve a search_catalog (anti-halucinación). - tools/searchCatalog.js: wrappea retrieveCandidates + fallback por categoría usando jsonb_array_elements_text del snapshot. Persiste last_shown_options automáticamente. - tools/{addToCart, setQuantity, selectCandidate, removeFromCart, setShipping, setAddress, confirmOrder, pause, escalateToHuman}.js: side effects atómicos sobre el order. - quantityParser.js (D1): determinístico, parsea fracciones, frases compuestas (media docena, cuarto kilo), numéricos. 46 tests. FSM extendida (fsm.js): nuevo estado PAUSED (TTL 7d, cart preservado, "después te digo" → pause tool). pipeline.js: TTL stale ahora 24h general, 7d si PAUSED, infinito si AWAITING_HUMAN. turnEngineV3.js: nuevas flags AGENT_TURN_ENGINE y AGENT_TURN_ENGINE_SHADOW. Branch a runTurnAgent cuando full o corre en paralelo escribiendo diffs estructurales en audit_log (entity_type='agent_shadow') para validar paridad antes de flippar. Endpoint nuevo: GET /api/metrics/agent → turns, avg_tool_calls, fallback rate, escalations, pauses, orders_confirmed. Smoke test E2E con DeepSeek real: - "hola" → say (2.3s, 1 tool) - "2kg de vacio" → search → add_to_cart → say (8.8s, 3 tools) - "media docena de chorizos" → search → say con clarificación (10.3s, 4 tools) - "listo" → say (3.3s, 1 tool) - "retiro" → set_shipping → confirm → say (5.1s, 3 tools) Cart final correcto: 2kg de Vacío. Estado: CART → SHIPPING. Tests: 238/238 pasando. D9 (cleanup legacy ~1200 LOC NLU/handlers/replyRewriter) DEFERRED: se hace después de paridad shadow validada con tráfico real. Hoy agente coexiste con legacy; default sigue siendo el motor V3. Plan completo en ~/.claude/plans/ok-creo-que-tiene-humming-sutton.md. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 12:52:47 -03:00
parent 9c69cf8911
commit 03621f16f4
21 changed files with 1414 additions and 11 deletions
--- a/src/modules/3-turn-engine/agent/runTurn.js
+++ b/src/modules/3-turn-engine/agent/runTurn.js
@@ -0,0 +1,319 @@
+/**
+ * runTurn — Punto de entrada del agente tool-calling.
+ *
+ * Reemplaza turnEngineV3 cuando AGENT_TURN_ENGINE=1.
+ * Mantiene la firma compatible con pipeline.js:
+ *   runTurnAgent({ tenantId, chat_id, text, prev_state, prev_context, conversation_history })
+ *     → { plan, decision }
+ */
+
+import OpenAI from "openai";
+import { migrateOldContext, createEmptyOrder } from "../orderModel.js";
+import { getStoreConfig } from "../../0-ui/db/settingsRepo.js";
+import { ConversationState, safeNextState } from "../fsm.js";
+import { buildWorkingMemory } from "./workingMemory.js";
+import { buildSystemPrompt } from "./systemPrompt.js";
+import { TOOL_SCHEMAS } from "./tools/schemas.js";
+import { executeToolCall } from "./tools/executor.js";
+import { getCustomerProfile } from "./customerProfile.js";
+import { debug as dbg } from "../../shared/debug.js";
+
+const MAX_TOOL_CALLS = parseInt(process.env.AGENT_MAX_TOOL_CALLS || "10", 10);
+const TURN_TIMEOUT_MS = parseInt(process.env.AGENT_TURN_TIMEOUT_MS || "20000", 10);
+
+// Métricas in-memory: turns/calls, fallback rate, escalation rate, avg duration.
+const _metrics = {
+  turns: 0,
+  total_tool_calls: 0,
+  total_llm_calls: 0,
+  total_duration_ms: 0,
+  fallback_used: 0,
+  llm_errors: 0,
+  escalations: 0,
+  pauses: 0,
+  orders_confirmed: 0,
+};
+
+export function getAgentMetrics() {
+  const t = _metrics.turns;
+  return {
+    turns: t,
+    avg_tool_calls_per_turn: t ? +(_metrics.total_tool_calls / t).toFixed(2) : 0,
+    avg_llm_calls_per_turn: t ? +(_metrics.total_llm_calls / t).toFixed(2) : 0,
+    avg_duration_ms: t ? Math.round(_metrics.total_duration_ms / t) : 0,
+    fallback_rate: t ? +(_metrics.fallback_used / t).toFixed(3) : 0,
+    error_rate: t ? +(_metrics.llm_errors / t).toFixed(3) : 0,
+    escalations: _metrics.escalations,
+    pauses: _metrics.pauses,
+    orders_confirmed: _metrics.orders_confirmed,
+  };
+}
+
+export function resetAgentMetrics() {
+  for (const k of Object.keys(_metrics)) _metrics[k] = 0;
+}
+
+let _client = null;
+function getClient() {
+  if (_client) return _client;
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) throw new Error("OPENAI_API_KEY not set");
+  const baseURL = process.env.OPENAI_BASE_URL || undefined;
+  _client = new OpenAI({ apiKey, ...(baseURL ? { baseURL } : {}) });
+  return _client;
+}
+
+function getModel() {
+  return process.env.OPENAI_MODEL || "deepseek-chat";
+}
+
+function withTimeout(promise, ms, label) {
+  return Promise.race([
+    promise,
+    new Promise((_, reject) =>
+      setTimeout(() => reject(new Error(`${label}_timeout_${ms}ms`)), ms)
+    ),
+  ]);
+}
+
+/**
+ * Punto de entrada principal. Mismo signature que runTurnV3.
+ */
+export async function runTurnAgent({
+  tenantId,
+  chat_id,
+  text,
+  prev_state,
+  prev_context,
+  conversation_history,
+}) {
+  const t0 = Date.now();
+  const audit = {
+    trace: { tenantId, chat_id, text_preview: String(text || "").slice(0, 50), prev_state, engine: "agent" },
+    tool_calls: [],
+    llm_calls: 0,
+  };
+
+  // Cargar order, store, last_shown_options, customer_profile
+  const order = migrateOldContext(prev_context);
+  const storeConfig = await getStoreConfig({ tenantId });
+  const lastShownOptions = Array.isArray(prev_context?.last_shown_options)
+    ? prev_context.last_shown_options
+    : [];
+  const customerProfile = await getCustomerProfile({ tenantId, chat_id }).catch((err) => {
+    audit.customer_profile_error = String(err?.message || err);
+    return null;
+  });
+
+  // Construir working memory
+  const wm = buildWorkingMemory({
+    text,
+    order,
+    prev_state: prev_state || "IDLE",
+    conversation_history,
+    storeConfig,
+    customerProfile,
+    lastShownOptions,
+  });
+
+  // Estado mutable que los tools mutan
+  const ctx = {
+    tenantId,
+    chat_id,
+    order: { ...order, last_shown_options: lastShownOptions },
+    pending_actions: [],
+    last_shown_options: [...lastShownOptions],
+    storeConfig,
+    say_text: null,
+    paused: false,
+    paused_until: order.paused_until ?? null,
+    awaiting_human: false,
+    awaiting_human_reason: null,
+    fsm_state: prev_state || "IDLE",
+  };
+
+  // Mensajes para el LLM
+  const systemPrompt = buildSystemPrompt({ storeName: storeConfig?.name });
+  const messages = [
+    { role: "system", content: systemPrompt },
+    { role: "user", content: JSON.stringify({ working_memory: wm }) },
+  ];
+
+  // Loop tool-calling
+  const client = getClient();
+  const model = getModel();
+  let turnDone = false;
+  let llmError = null;
+
+  try {
+    for (let i = 0; i < MAX_TOOL_CALLS && !turnDone; i++) {
+      audit.llm_calls++;
+      const elapsed = Date.now() - t0;
+      const remaining = Math.max(2000, TURN_TIMEOUT_MS - elapsed);
+
+      if (dbg.llm) console.log("[agent] llm.request", { model, iteration: i, remaining_ms: remaining });
+
+      const resp = await withTimeout(
+        client.chat.completions.create({
+          model,
+          temperature: 0.4,
+          max_tokens: 600,
+          tools: TOOL_SCHEMAS,
+          tool_choice: "required",
+          messages,
+        }),
+        remaining,
+        "agent_llm"
+      );
+
+      const msg = resp?.choices?.[0]?.message || {};
+      messages.push({ role: "assistant", content: msg.content || "", tool_calls: msg.tool_calls || [] });
+
+      const calls = msg.tool_calls || [];
+      if (!calls.length) {
+        // Sin tool calls → forzar say con fallback y salir
+        audit.no_tool_calls = true;
+        ctx.say_text = ctx.say_text || msg.content || "Disculpame, no te entendí. ¿Me lo decís de otra forma?";
+        turnDone = true;
+        break;
+      }
+
+      for (const call of calls) {
+        const obs = await executeToolCall(call, ctx);
+        audit.tool_calls.push({
+          name: call.function?.name,
+          ok: obs.ok !== false,
+          error: obs.error || null,
+          duration_ms: obs.duration_ms || null,
+        });
+        messages.push({
+          role: "tool",
+          tool_call_id: call.id,
+          content: JSON.stringify(obs),
+        });
+        if (obs.terminal) {
+          turnDone = true;
+        }
+      }
+    }
+  } catch (err) {
+    llmError = String(err?.message || err);
+    audit.llm_error = llmError;
+    if (dbg.llm) console.error("[agent] error", llmError);
+  }
+
+  // Si no hay say, fallback determinista
+  if (!ctx.say_text) {
+    ctx.say_text = pickFallbackReply(ctx, llmError);
+    audit.fallback_used = true;
+  }
+
+  audit.duration_ms = Date.now() - t0;
+
+  // Actualizar métricas
+  _metrics.turns++;
+  _metrics.total_tool_calls += audit.tool_calls.length;
+  _metrics.total_llm_calls += audit.llm_calls;
+  _metrics.total_duration_ms += audit.duration_ms;
+  if (audit.fallback_used) _metrics.fallback_used++;
+  if (audit.llm_error) _metrics.llm_errors++;
+  if (ctx.awaiting_human) _metrics.escalations++;
+  if (ctx.paused) _metrics.pauses++;
+  if (ctx.pending_actions.some((a) => a.type === "create_order")) _metrics.orders_confirmed++;
+
+  // Derivar nextState desde el order resultante
+  const signals = {
+    confirm_order: ctx.pending_actions.some((a) => a.type === "create_order"),
+    shipping_completed: ctx.pending_actions.some((a) => a.type === "create_order"),
+    return_to_cart: false,
+  };
+  // Si el agente pausó la conversación, mantenemos el order pero el next_state
+  // queda guardado en ctx.fsm_state ("PAUSED") para que pipeline lo persista.
+  let nextState;
+  if (ctx.awaiting_human) {
+    nextState = ConversationState.AWAITING_HUMAN;
+  } else if (ctx.paused) {
+    nextState = "PAUSED"; // estado nuevo, fsm.js lo va a permitir tras D7
+  } else {
+    nextState = safeNextState(prev_state, ctx.order, signals).next_state;
+  }
+
+  return {
+    plan: {
+      reply: ctx.say_text,
+      next_state: nextState,
+      intent: detectIntent(audit.tool_calls),
+      missing_fields: [],
+      order_action: ctx.pending_actions[0]?.type || "none",
+      basket_resolved: { items: (ctx.order.cart || []).map(toBasketItem) },
+    },
+    decision: {
+      actions: ctx.pending_actions,
+      context_patch: buildContextPatch(ctx),
+      audit,
+    },
+  };
+}
+
+function pickFallbackReply(ctx, err) {
+  if (ctx.awaiting_human) return "Te paso con un humano que pueda ayudarte.";
+  if (ctx.paused) return "Dale, cuando quieras seguimos.";
+  if (err) return "Disculpame, tuve un problema. ¿Lo intentás de nuevo?";
+  return "No te seguí, ¿me lo decís de otra forma?";
+}
+
+function detectIntent(toolCalls = []) {
+  const names = toolCalls.map((c) => c.name);
+  if (names.includes("confirm_order")) return "confirm_order";
+  if (names.includes("set_address") || names.includes("set_shipping")) return "select_shipping";
+  if (names.includes("escalate_to_human")) return "escalate";
+  if (names.includes("pause")) return "pause";
+  if (names.includes("add_to_cart") || names.includes("set_quantity") || names.includes("select_candidate")) return "add_to_cart";
+  if (names.includes("remove_from_cart")) return "remove_from_cart";
+  if (names.includes("search_catalog")) return "browse";
+  return "other";
+}
+
+function toBasketItem(item) {
+  return {
+    product_id: item.woo_id,
+    woo_product_id: item.woo_id,
+    quantity: item.qty,
+    unit: item.unit,
+    label: item.name,
+    name: item.name,
+    price: item.price,
+  };
+}
+
+function buildContextPatch(ctx) {
+  const order = ctx.order || createEmptyOrder();
+  return {
+    order,
+    order_basket: { items: (order.cart || []).map(toBasketItem) },
+    pending_items: (order.pending || []).map((p) => ({
+      id: p.id,
+      query: p.query,
+      candidates: p.candidates,
+      resolved_product: p.selected_woo_id
+        ? {
+            woo_product_id: p.selected_woo_id,
+            name: p.selected_name,
+            price: p.selected_price,
+            display_unit: p.selected_unit,
+          }
+        : null,
+      quantity: p.qty,
+      unit: p.unit,
+      status: p.status?.toLowerCase() || "needs_type",
+    })),
+    shipping_method:
+      order.is_delivery === true ? "delivery" : order.is_delivery === false ? "pickup" : null,
+    delivery_address: order.shipping_address ? { text: order.shipping_address } : null,
+    woo_order_id: order.woo_order_id,
+    last_shown_options: ctx.last_shown_options || [],
+    paused_until: ctx.paused_until || null,
+    awaiting_human: ctx.awaiting_human || false,
+    awaiting_human_reason: ctx.awaiting_human_reason || null,
+  };
+}