Redesign: agente tool-calling con DeepSeek (D2-D10 del plan)
Reemplaza el NLU rígido (intent+entities) por un agente LLM con tool-calling
que decide y muta estado en cada turno. Opt-in vía AGENT_TURN_ENGINE=1.
DeepSeek V4 (deepseek-chat) configurado como modelo (OpenAI-compatible).
Arquitectura nueva en src/modules/3-turn-engine/agent/:
- workingMemory.js: arma el JSON contextual que recibe el LLM cada turno
(cart, pending, last_shown_options, store, customer_profile, history,
preparsed quantity).
- systemPrompt.js: prompt estático ~70 líneas. Define rol + reglas duras +
cómo procesar mensajes + cómo escribir el say. Sin enumeración de intents.
- runTurn.js: loop de tool-calling con tool_choice="required". Cap 10 tool
calls / 20s timeout. Métricas in-memory.
- customerProfile.js: lookup de frequent_items en woo_orders_cache por
teléfono (chat_id → phone), top 5 últimos 6 meses. Cache 10 min.
- tools/schemas.js: 11 tools (search_catalog, add_to_cart, set_quantity,
select_candidate, remove_from_cart, set_shipping, set_address,
confirm_order, pause, escalate_to_human, say).
- tools/executor.js: validación Ajv + dispatch + observación al LLM.
woo_id se valida contra snapshot — si no existe el agente vuelve a
search_catalog (anti-halucinación).
- tools/searchCatalog.js: wrappea retrieveCandidates + fallback por
categoría usando jsonb_array_elements_text del snapshot. Persiste
last_shown_options automáticamente.
- tools/{addToCart, setQuantity, selectCandidate, removeFromCart,
setShipping, setAddress, confirmOrder, pause, escalateToHuman}.js:
side effects atómicos sobre el order.
- quantityParser.js (D1): determinístico, parsea fracciones, frases
compuestas (media docena, cuarto kilo), numéricos. 46 tests.
FSM extendida (fsm.js): nuevo estado PAUSED (TTL 7d, cart preservado,
"después te digo" → pause tool).
pipeline.js: TTL stale ahora 24h general, 7d si PAUSED, infinito si
AWAITING_HUMAN.
turnEngineV3.js: nuevas flags AGENT_TURN_ENGINE y AGENT_TURN_ENGINE_SHADOW.
Branch a runTurnAgent cuando full o corre en paralelo escribiendo diffs
estructurales en audit_log (entity_type='agent_shadow') para validar
paridad antes de flippar.
Endpoint nuevo: GET /api/metrics/agent → turns, avg_tool_calls, fallback
rate, escalations, pauses, orders_confirmed.
Smoke test E2E con DeepSeek real:
- "hola" → say (2.3s, 1 tool)
- "2kg de vacio" → search → add_to_cart → say (8.8s, 3 tools)
- "media docena de chorizos" → search → say con clarificación (10.3s, 4 tools)
- "listo" → say (3.3s, 1 tool)
- "retiro" → set_shipping → confirm → say (5.1s, 3 tools)
Cart final correcto: 2kg de Vacío. Estado: CART → SHIPPING.
Tests: 238/238 pasando.
D9 (cleanup legacy ~1200 LOC NLU/handlers/replyRewriter) DEFERRED:
se hace después de paridad shadow validada con tráfico real. Hoy
agente coexiste con legacy; default sigue siendo el motor V3.
Plan completo en ~/.claude/plans/ok-creo-que-tiene-humming-sutton.md.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
319
src/modules/3-turn-engine/agent/runTurn.js
Normal file
319
src/modules/3-turn-engine/agent/runTurn.js
Normal file
@@ -0,0 +1,319 @@
|
||||
/**
|
||||
* runTurn — Punto de entrada del agente tool-calling.
|
||||
*
|
||||
* Reemplaza turnEngineV3 cuando AGENT_TURN_ENGINE=1.
|
||||
* Mantiene la firma compatible con pipeline.js:
|
||||
* runTurnAgent({ tenantId, chat_id, text, prev_state, prev_context, conversation_history })
|
||||
* → { plan, decision }
|
||||
*/
|
||||
|
||||
import OpenAI from "openai";
|
||||
import { migrateOldContext, createEmptyOrder } from "../orderModel.js";
|
||||
import { getStoreConfig } from "../../0-ui/db/settingsRepo.js";
|
||||
import { ConversationState, safeNextState } from "../fsm.js";
|
||||
import { buildWorkingMemory } from "./workingMemory.js";
|
||||
import { buildSystemPrompt } from "./systemPrompt.js";
|
||||
import { TOOL_SCHEMAS } from "./tools/schemas.js";
|
||||
import { executeToolCall } from "./tools/executor.js";
|
||||
import { getCustomerProfile } from "./customerProfile.js";
|
||||
import { debug as dbg } from "../../shared/debug.js";
|
||||
|
||||
const MAX_TOOL_CALLS = parseInt(process.env.AGENT_MAX_TOOL_CALLS || "10", 10);
|
||||
const TURN_TIMEOUT_MS = parseInt(process.env.AGENT_TURN_TIMEOUT_MS || "20000", 10);
|
||||
|
||||
// Métricas in-memory: turns/calls, fallback rate, escalation rate, avg duration.
|
||||
const _metrics = {
|
||||
turns: 0,
|
||||
total_tool_calls: 0,
|
||||
total_llm_calls: 0,
|
||||
total_duration_ms: 0,
|
||||
fallback_used: 0,
|
||||
llm_errors: 0,
|
||||
escalations: 0,
|
||||
pauses: 0,
|
||||
orders_confirmed: 0,
|
||||
};
|
||||
|
||||
export function getAgentMetrics() {
|
||||
const t = _metrics.turns;
|
||||
return {
|
||||
turns: t,
|
||||
avg_tool_calls_per_turn: t ? +(_metrics.total_tool_calls / t).toFixed(2) : 0,
|
||||
avg_llm_calls_per_turn: t ? +(_metrics.total_llm_calls / t).toFixed(2) : 0,
|
||||
avg_duration_ms: t ? Math.round(_metrics.total_duration_ms / t) : 0,
|
||||
fallback_rate: t ? +(_metrics.fallback_used / t).toFixed(3) : 0,
|
||||
error_rate: t ? +(_metrics.llm_errors / t).toFixed(3) : 0,
|
||||
escalations: _metrics.escalations,
|
||||
pauses: _metrics.pauses,
|
||||
orders_confirmed: _metrics.orders_confirmed,
|
||||
};
|
||||
}
|
||||
|
||||
export function resetAgentMetrics() {
|
||||
for (const k of Object.keys(_metrics)) _metrics[k] = 0;
|
||||
}
|
||||
|
||||
let _client = null;
|
||||
function getClient() {
|
||||
if (_client) return _client;
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) throw new Error("OPENAI_API_KEY not set");
|
||||
const baseURL = process.env.OPENAI_BASE_URL || undefined;
|
||||
_client = new OpenAI({ apiKey, ...(baseURL ? { baseURL } : {}) });
|
||||
return _client;
|
||||
}
|
||||
|
||||
function getModel() {
|
||||
return process.env.OPENAI_MODEL || "deepseek-chat";
|
||||
}
|
||||
|
||||
function withTimeout(promise, ms, label) {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error(`${label}_timeout_${ms}ms`)), ms)
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Punto de entrada principal. Mismo signature que runTurnV3.
|
||||
*/
|
||||
export async function runTurnAgent({
|
||||
tenantId,
|
||||
chat_id,
|
||||
text,
|
||||
prev_state,
|
||||
prev_context,
|
||||
conversation_history,
|
||||
}) {
|
||||
const t0 = Date.now();
|
||||
const audit = {
|
||||
trace: { tenantId, chat_id, text_preview: String(text || "").slice(0, 50), prev_state, engine: "agent" },
|
||||
tool_calls: [],
|
||||
llm_calls: 0,
|
||||
};
|
||||
|
||||
// Cargar order, store, last_shown_options, customer_profile
|
||||
const order = migrateOldContext(prev_context);
|
||||
const storeConfig = await getStoreConfig({ tenantId });
|
||||
const lastShownOptions = Array.isArray(prev_context?.last_shown_options)
|
||||
? prev_context.last_shown_options
|
||||
: [];
|
||||
const customerProfile = await getCustomerProfile({ tenantId, chat_id }).catch((err) => {
|
||||
audit.customer_profile_error = String(err?.message || err);
|
||||
return null;
|
||||
});
|
||||
|
||||
// Construir working memory
|
||||
const wm = buildWorkingMemory({
|
||||
text,
|
||||
order,
|
||||
prev_state: prev_state || "IDLE",
|
||||
conversation_history,
|
||||
storeConfig,
|
||||
customerProfile,
|
||||
lastShownOptions,
|
||||
});
|
||||
|
||||
// Estado mutable que los tools mutan
|
||||
const ctx = {
|
||||
tenantId,
|
||||
chat_id,
|
||||
order: { ...order, last_shown_options: lastShownOptions },
|
||||
pending_actions: [],
|
||||
last_shown_options: [...lastShownOptions],
|
||||
storeConfig,
|
||||
say_text: null,
|
||||
paused: false,
|
||||
paused_until: order.paused_until ?? null,
|
||||
awaiting_human: false,
|
||||
awaiting_human_reason: null,
|
||||
fsm_state: prev_state || "IDLE",
|
||||
};
|
||||
|
||||
// Mensajes para el LLM
|
||||
const systemPrompt = buildSystemPrompt({ storeName: storeConfig?.name });
|
||||
const messages = [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "user", content: JSON.stringify({ working_memory: wm }) },
|
||||
];
|
||||
|
||||
// Loop tool-calling
|
||||
const client = getClient();
|
||||
const model = getModel();
|
||||
let turnDone = false;
|
||||
let llmError = null;
|
||||
|
||||
try {
|
||||
for (let i = 0; i < MAX_TOOL_CALLS && !turnDone; i++) {
|
||||
audit.llm_calls++;
|
||||
const elapsed = Date.now() - t0;
|
||||
const remaining = Math.max(2000, TURN_TIMEOUT_MS - elapsed);
|
||||
|
||||
if (dbg.llm) console.log("[agent] llm.request", { model, iteration: i, remaining_ms: remaining });
|
||||
|
||||
const resp = await withTimeout(
|
||||
client.chat.completions.create({
|
||||
model,
|
||||
temperature: 0.4,
|
||||
max_tokens: 600,
|
||||
tools: TOOL_SCHEMAS,
|
||||
tool_choice: "required",
|
||||
messages,
|
||||
}),
|
||||
remaining,
|
||||
"agent_llm"
|
||||
);
|
||||
|
||||
const msg = resp?.choices?.[0]?.message || {};
|
||||
messages.push({ role: "assistant", content: msg.content || "", tool_calls: msg.tool_calls || [] });
|
||||
|
||||
const calls = msg.tool_calls || [];
|
||||
if (!calls.length) {
|
||||
// Sin tool calls → forzar say con fallback y salir
|
||||
audit.no_tool_calls = true;
|
||||
ctx.say_text = ctx.say_text || msg.content || "Disculpame, no te entendí. ¿Me lo decís de otra forma?";
|
||||
turnDone = true;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const call of calls) {
|
||||
const obs = await executeToolCall(call, ctx);
|
||||
audit.tool_calls.push({
|
||||
name: call.function?.name,
|
||||
ok: obs.ok !== false,
|
||||
error: obs.error || null,
|
||||
duration_ms: obs.duration_ms || null,
|
||||
});
|
||||
messages.push({
|
||||
role: "tool",
|
||||
tool_call_id: call.id,
|
||||
content: JSON.stringify(obs),
|
||||
});
|
||||
if (obs.terminal) {
|
||||
turnDone = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
llmError = String(err?.message || err);
|
||||
audit.llm_error = llmError;
|
||||
if (dbg.llm) console.error("[agent] error", llmError);
|
||||
}
|
||||
|
||||
// Si no hay say, fallback determinista
|
||||
if (!ctx.say_text) {
|
||||
ctx.say_text = pickFallbackReply(ctx, llmError);
|
||||
audit.fallback_used = true;
|
||||
}
|
||||
|
||||
audit.duration_ms = Date.now() - t0;
|
||||
|
||||
// Actualizar métricas
|
||||
_metrics.turns++;
|
||||
_metrics.total_tool_calls += audit.tool_calls.length;
|
||||
_metrics.total_llm_calls += audit.llm_calls;
|
||||
_metrics.total_duration_ms += audit.duration_ms;
|
||||
if (audit.fallback_used) _metrics.fallback_used++;
|
||||
if (audit.llm_error) _metrics.llm_errors++;
|
||||
if (ctx.awaiting_human) _metrics.escalations++;
|
||||
if (ctx.paused) _metrics.pauses++;
|
||||
if (ctx.pending_actions.some((a) => a.type === "create_order")) _metrics.orders_confirmed++;
|
||||
|
||||
// Derivar nextState desde el order resultante
|
||||
const signals = {
|
||||
confirm_order: ctx.pending_actions.some((a) => a.type === "create_order"),
|
||||
shipping_completed: ctx.pending_actions.some((a) => a.type === "create_order"),
|
||||
return_to_cart: false,
|
||||
};
|
||||
// Si el agente pausó la conversación, mantenemos el order pero el next_state
|
||||
// queda guardado en ctx.fsm_state ("PAUSED") para que pipeline lo persista.
|
||||
let nextState;
|
||||
if (ctx.awaiting_human) {
|
||||
nextState = ConversationState.AWAITING_HUMAN;
|
||||
} else if (ctx.paused) {
|
||||
nextState = "PAUSED"; // estado nuevo, fsm.js lo va a permitir tras D7
|
||||
} else {
|
||||
nextState = safeNextState(prev_state, ctx.order, signals).next_state;
|
||||
}
|
||||
|
||||
return {
|
||||
plan: {
|
||||
reply: ctx.say_text,
|
||||
next_state: nextState,
|
||||
intent: detectIntent(audit.tool_calls),
|
||||
missing_fields: [],
|
||||
order_action: ctx.pending_actions[0]?.type || "none",
|
||||
basket_resolved: { items: (ctx.order.cart || []).map(toBasketItem) },
|
||||
},
|
||||
decision: {
|
||||
actions: ctx.pending_actions,
|
||||
context_patch: buildContextPatch(ctx),
|
||||
audit,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function pickFallbackReply(ctx, err) {
|
||||
if (ctx.awaiting_human) return "Te paso con un humano que pueda ayudarte.";
|
||||
if (ctx.paused) return "Dale, cuando quieras seguimos.";
|
||||
if (err) return "Disculpame, tuve un problema. ¿Lo intentás de nuevo?";
|
||||
return "No te seguí, ¿me lo decís de otra forma?";
|
||||
}
|
||||
|
||||
function detectIntent(toolCalls = []) {
|
||||
const names = toolCalls.map((c) => c.name);
|
||||
if (names.includes("confirm_order")) return "confirm_order";
|
||||
if (names.includes("set_address") || names.includes("set_shipping")) return "select_shipping";
|
||||
if (names.includes("escalate_to_human")) return "escalate";
|
||||
if (names.includes("pause")) return "pause";
|
||||
if (names.includes("add_to_cart") || names.includes("set_quantity") || names.includes("select_candidate")) return "add_to_cart";
|
||||
if (names.includes("remove_from_cart")) return "remove_from_cart";
|
||||
if (names.includes("search_catalog")) return "browse";
|
||||
return "other";
|
||||
}
|
||||
|
||||
function toBasketItem(item) {
|
||||
return {
|
||||
product_id: item.woo_id,
|
||||
woo_product_id: item.woo_id,
|
||||
quantity: item.qty,
|
||||
unit: item.unit,
|
||||
label: item.name,
|
||||
name: item.name,
|
||||
price: item.price,
|
||||
};
|
||||
}
|
||||
|
||||
function buildContextPatch(ctx) {
|
||||
const order = ctx.order || createEmptyOrder();
|
||||
return {
|
||||
order,
|
||||
order_basket: { items: (order.cart || []).map(toBasketItem) },
|
||||
pending_items: (order.pending || []).map((p) => ({
|
||||
id: p.id,
|
||||
query: p.query,
|
||||
candidates: p.candidates,
|
||||
resolved_product: p.selected_woo_id
|
||||
? {
|
||||
woo_product_id: p.selected_woo_id,
|
||||
name: p.selected_name,
|
||||
price: p.selected_price,
|
||||
display_unit: p.selected_unit,
|
||||
}
|
||||
: null,
|
||||
quantity: p.qty,
|
||||
unit: p.unit,
|
||||
status: p.status?.toLowerCase() || "needs_type",
|
||||
})),
|
||||
shipping_method:
|
||||
order.is_delivery === true ? "delivery" : order.is_delivery === false ? "pickup" : null,
|
||||
delivery_address: order.shipping_address ? { text: order.shipping_address } : null,
|
||||
woo_order_id: order.woo_order_id,
|
||||
last_shown_options: ctx.last_shown_options || [],
|
||||
paused_until: ctx.paused_until || null,
|
||||
awaiting_human: ctx.awaiting_human || false,
|
||||
awaiting_human_reason: ctx.awaiting_human_reason || null,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user