Files
botino/src/modules/3-turn-engine/agent/runTurn.js
Lucas Tettamanti 03621f16f4 Redesign: agente tool-calling con DeepSeek (D2-D10 del plan)
Reemplaza el NLU rígido (intent+entities) por un agente LLM con tool-calling
que decide y muta estado en cada turno. Opt-in vía AGENT_TURN_ENGINE=1.
DeepSeek V4 (deepseek-chat) configurado como modelo (OpenAI-compatible).

Arquitectura nueva en src/modules/3-turn-engine/agent/:

- workingMemory.js: arma el JSON contextual que recibe el LLM cada turno
  (cart, pending, last_shown_options, store, customer_profile, history,
  preparsed quantity).
- systemPrompt.js: prompt estático ~70 líneas. Define rol + reglas duras +
  cómo procesar mensajes + cómo escribir el say. Sin enumeración de intents.
- runTurn.js: loop de tool-calling con tool_choice="required". Cap 10 tool
  calls / 20s timeout. Métricas in-memory.
- customerProfile.js: lookup de frequent_items en woo_orders_cache por
  teléfono (chat_id → phone), top 5 últimos 6 meses. Cache 10 min.
- tools/schemas.js: 11 tools (search_catalog, add_to_cart, set_quantity,
  select_candidate, remove_from_cart, set_shipping, set_address,
  confirm_order, pause, escalate_to_human, say).
- tools/executor.js: validación Ajv + dispatch + observación al LLM.
  woo_id se valida contra snapshot — si no existe el agente vuelve a
  search_catalog (anti-halucinación).
- tools/searchCatalog.js: wrappea retrieveCandidates + fallback por
  categoría usando jsonb_array_elements_text del snapshot. Persiste
  last_shown_options automáticamente.
- tools/{addToCart, setQuantity, selectCandidate, removeFromCart,
  setShipping, setAddress, confirmOrder, pause, escalateToHuman}.js:
  side effects atómicos sobre el order.
- quantityParser.js (D1): determinístico, parsea fracciones, frases
  compuestas (media docena, cuarto kilo), numéricos. 46 tests.

FSM extendida (fsm.js): nuevo estado PAUSED (TTL 7d, cart preservado,
"después te digo" → pause tool).

pipeline.js: TTL stale ahora 24h general, 7d si PAUSED, infinito si
AWAITING_HUMAN.

turnEngineV3.js: nuevas flags AGENT_TURN_ENGINE y AGENT_TURN_ENGINE_SHADOW.
Branch a runTurnAgent cuando full o corre en paralelo escribiendo diffs
estructurales en audit_log (entity_type='agent_shadow') para validar
paridad antes de flippar.

Endpoint nuevo: GET /api/metrics/agent → turns, avg_tool_calls, fallback
rate, escalations, pauses, orders_confirmed.

Smoke test E2E con DeepSeek real:
- "hola" → say (2.3s, 1 tool)
- "2kg de vacio" → search → add_to_cart → say (8.8s, 3 tools)
- "media docena de chorizos" → search → say con clarificación (10.3s, 4 tools)
- "listo" → say (3.3s, 1 tool)
- "retiro" → set_shipping → confirm → say (5.1s, 3 tools)
Cart final correcto: 2kg de Vacío. Estado: CART → SHIPPING.

Tests: 238/238 pasando.

D9 (cleanup legacy ~1200 LOC NLU/handlers/replyRewriter) DEFERRED:
se hace después de paridad shadow validada con tráfico real. Hoy
agente coexiste con legacy; default sigue siendo el motor V3.

Plan completo en ~/.claude/plans/ok-creo-que-tiene-humming-sutton.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 12:52:47 -03:00

320 lines
10 KiB
JavaScript

/**
* runTurn — Punto de entrada del agente tool-calling.
*
* Reemplaza turnEngineV3 cuando AGENT_TURN_ENGINE=1.
* Mantiene la firma compatible con pipeline.js:
* runTurnAgent({ tenantId, chat_id, text, prev_state, prev_context, conversation_history })
* → { plan, decision }
*/
import OpenAI from "openai";
import { migrateOldContext, createEmptyOrder } from "../orderModel.js";
import { getStoreConfig } from "../../0-ui/db/settingsRepo.js";
import { ConversationState, safeNextState } from "../fsm.js";
import { buildWorkingMemory } from "./workingMemory.js";
import { buildSystemPrompt } from "./systemPrompt.js";
import { TOOL_SCHEMAS } from "./tools/schemas.js";
import { executeToolCall } from "./tools/executor.js";
import { getCustomerProfile } from "./customerProfile.js";
import { debug as dbg } from "../../shared/debug.js";
const MAX_TOOL_CALLS = parseInt(process.env.AGENT_MAX_TOOL_CALLS || "10", 10);
const TURN_TIMEOUT_MS = parseInt(process.env.AGENT_TURN_TIMEOUT_MS || "20000", 10);
// Métricas in-memory: turns/calls, fallback rate, escalation rate, avg duration.
const _metrics = {
turns: 0,
total_tool_calls: 0,
total_llm_calls: 0,
total_duration_ms: 0,
fallback_used: 0,
llm_errors: 0,
escalations: 0,
pauses: 0,
orders_confirmed: 0,
};
export function getAgentMetrics() {
const t = _metrics.turns;
return {
turns: t,
avg_tool_calls_per_turn: t ? +(_metrics.total_tool_calls / t).toFixed(2) : 0,
avg_llm_calls_per_turn: t ? +(_metrics.total_llm_calls / t).toFixed(2) : 0,
avg_duration_ms: t ? Math.round(_metrics.total_duration_ms / t) : 0,
fallback_rate: t ? +(_metrics.fallback_used / t).toFixed(3) : 0,
error_rate: t ? +(_metrics.llm_errors / t).toFixed(3) : 0,
escalations: _metrics.escalations,
pauses: _metrics.pauses,
orders_confirmed: _metrics.orders_confirmed,
};
}
export function resetAgentMetrics() {
for (const k of Object.keys(_metrics)) _metrics[k] = 0;
}
let _client = null;
function getClient() {
if (_client) return _client;
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error("OPENAI_API_KEY not set");
const baseURL = process.env.OPENAI_BASE_URL || undefined;
_client = new OpenAI({ apiKey, ...(baseURL ? { baseURL } : {}) });
return _client;
}
function getModel() {
return process.env.OPENAI_MODEL || "deepseek-chat";
}
function withTimeout(promise, ms, label) {
return Promise.race([
promise,
new Promise((_, reject) =>
setTimeout(() => reject(new Error(`${label}_timeout_${ms}ms`)), ms)
),
]);
}
/**
* Punto de entrada principal. Mismo signature que runTurnV3.
*/
export async function runTurnAgent({
tenantId,
chat_id,
text,
prev_state,
prev_context,
conversation_history,
}) {
const t0 = Date.now();
const audit = {
trace: { tenantId, chat_id, text_preview: String(text || "").slice(0, 50), prev_state, engine: "agent" },
tool_calls: [],
llm_calls: 0,
};
// Cargar order, store, last_shown_options, customer_profile
const order = migrateOldContext(prev_context);
const storeConfig = await getStoreConfig({ tenantId });
const lastShownOptions = Array.isArray(prev_context?.last_shown_options)
? prev_context.last_shown_options
: [];
const customerProfile = await getCustomerProfile({ tenantId, chat_id }).catch((err) => {
audit.customer_profile_error = String(err?.message || err);
return null;
});
// Construir working memory
const wm = buildWorkingMemory({
text,
order,
prev_state: prev_state || "IDLE",
conversation_history,
storeConfig,
customerProfile,
lastShownOptions,
});
// Estado mutable que los tools mutan
const ctx = {
tenantId,
chat_id,
order: { ...order, last_shown_options: lastShownOptions },
pending_actions: [],
last_shown_options: [...lastShownOptions],
storeConfig,
say_text: null,
paused: false,
paused_until: order.paused_until ?? null,
awaiting_human: false,
awaiting_human_reason: null,
fsm_state: prev_state || "IDLE",
};
// Mensajes para el LLM
const systemPrompt = buildSystemPrompt({ storeName: storeConfig?.name });
const messages = [
{ role: "system", content: systemPrompt },
{ role: "user", content: JSON.stringify({ working_memory: wm }) },
];
// Loop tool-calling
const client = getClient();
const model = getModel();
let turnDone = false;
let llmError = null;
try {
for (let i = 0; i < MAX_TOOL_CALLS && !turnDone; i++) {
audit.llm_calls++;
const elapsed = Date.now() - t0;
const remaining = Math.max(2000, TURN_TIMEOUT_MS - elapsed);
if (dbg.llm) console.log("[agent] llm.request", { model, iteration: i, remaining_ms: remaining });
const resp = await withTimeout(
client.chat.completions.create({
model,
temperature: 0.4,
max_tokens: 600,
tools: TOOL_SCHEMAS,
tool_choice: "required",
messages,
}),
remaining,
"agent_llm"
);
const msg = resp?.choices?.[0]?.message || {};
messages.push({ role: "assistant", content: msg.content || "", tool_calls: msg.tool_calls || [] });
const calls = msg.tool_calls || [];
if (!calls.length) {
// Sin tool calls → forzar say con fallback y salir
audit.no_tool_calls = true;
ctx.say_text = ctx.say_text || msg.content || "Disculpame, no te entendí. ¿Me lo decís de otra forma?";
turnDone = true;
break;
}
for (const call of calls) {
const obs = await executeToolCall(call, ctx);
audit.tool_calls.push({
name: call.function?.name,
ok: obs.ok !== false,
error: obs.error || null,
duration_ms: obs.duration_ms || null,
});
messages.push({
role: "tool",
tool_call_id: call.id,
content: JSON.stringify(obs),
});
if (obs.terminal) {
turnDone = true;
}
}
}
} catch (err) {
llmError = String(err?.message || err);
audit.llm_error = llmError;
if (dbg.llm) console.error("[agent] error", llmError);
}
// Si no hay say, fallback determinista
if (!ctx.say_text) {
ctx.say_text = pickFallbackReply(ctx, llmError);
audit.fallback_used = true;
}
audit.duration_ms = Date.now() - t0;
// Actualizar métricas
_metrics.turns++;
_metrics.total_tool_calls += audit.tool_calls.length;
_metrics.total_llm_calls += audit.llm_calls;
_metrics.total_duration_ms += audit.duration_ms;
if (audit.fallback_used) _metrics.fallback_used++;
if (audit.llm_error) _metrics.llm_errors++;
if (ctx.awaiting_human) _metrics.escalations++;
if (ctx.paused) _metrics.pauses++;
if (ctx.pending_actions.some((a) => a.type === "create_order")) _metrics.orders_confirmed++;
// Derivar nextState desde el order resultante
const signals = {
confirm_order: ctx.pending_actions.some((a) => a.type === "create_order"),
shipping_completed: ctx.pending_actions.some((a) => a.type === "create_order"),
return_to_cart: false,
};
// Si el agente pausó la conversación, mantenemos el order pero el next_state
// queda guardado en ctx.fsm_state ("PAUSED") para que pipeline lo persista.
let nextState;
if (ctx.awaiting_human) {
nextState = ConversationState.AWAITING_HUMAN;
} else if (ctx.paused) {
nextState = "PAUSED"; // estado nuevo, fsm.js lo va a permitir tras D7
} else {
nextState = safeNextState(prev_state, ctx.order, signals).next_state;
}
return {
plan: {
reply: ctx.say_text,
next_state: nextState,
intent: detectIntent(audit.tool_calls),
missing_fields: [],
order_action: ctx.pending_actions[0]?.type || "none",
basket_resolved: { items: (ctx.order.cart || []).map(toBasketItem) },
},
decision: {
actions: ctx.pending_actions,
context_patch: buildContextPatch(ctx),
audit,
},
};
}
function pickFallbackReply(ctx, err) {
if (ctx.awaiting_human) return "Te paso con un humano que pueda ayudarte.";
if (ctx.paused) return "Dale, cuando quieras seguimos.";
if (err) return "Disculpame, tuve un problema. ¿Lo intentás de nuevo?";
return "No te seguí, ¿me lo decís de otra forma?";
}
function detectIntent(toolCalls = []) {
const names = toolCalls.map((c) => c.name);
if (names.includes("confirm_order")) return "confirm_order";
if (names.includes("set_address") || names.includes("set_shipping")) return "select_shipping";
if (names.includes("escalate_to_human")) return "escalate";
if (names.includes("pause")) return "pause";
if (names.includes("add_to_cart") || names.includes("set_quantity") || names.includes("select_candidate")) return "add_to_cart";
if (names.includes("remove_from_cart")) return "remove_from_cart";
if (names.includes("search_catalog")) return "browse";
return "other";
}
function toBasketItem(item) {
return {
product_id: item.woo_id,
woo_product_id: item.woo_id,
quantity: item.qty,
unit: item.unit,
label: item.name,
name: item.name,
price: item.price,
};
}
function buildContextPatch(ctx) {
const order = ctx.order || createEmptyOrder();
return {
order,
order_basket: { items: (order.cart || []).map(toBasketItem) },
pending_items: (order.pending || []).map((p) => ({
id: p.id,
query: p.query,
candidates: p.candidates,
resolved_product: p.selected_woo_id
? {
woo_product_id: p.selected_woo_id,
name: p.selected_name,
price: p.selected_price,
display_unit: p.selected_unit,
}
: null,
quantity: p.qty,
unit: p.unit,
status: p.status?.toLowerCase() || "needs_type",
})),
shipping_method:
order.is_delivery === true ? "delivery" : order.is_delivery === false ? "pickup" : null,
delivery_address: order.shipping_address ? { text: order.shipping_address } : null,
woo_order_id: order.woo_order_id,
last_shown_options: ctx.last_shown_options || [],
paused_until: ctx.paused_until || null,
awaiting_human: ctx.awaiting_human || false,
awaiting_human_reason: ctx.awaiting_human_reason || null,
};
}