The Atlas AnyLegal OSS — documentation bound to its code
20 documents

The agentic chat loop, end to end

Follow one chat request from the SSE endpoint through the reactive turn loop to tool dispatch — the system's beating heart, doc claims bound to the code.

backend/anylegal_oss/workspace/tool_call_rescue.py147 lines · rescue_tool_calls_from_content L56–124
Outline 3 symbols
1"""
2Rescue tool calls that the model emitted as pseudo-XML in content instead of
3using the native OpenAI/Anthropic function-calling field.
4
5Some models (notably Kimi K2 on OpenRouter) fall back to emitting tool calls
6as Anthropic-style XML inside the assistant's text content:
7
8 <tool>enter_plan_mode</tool>
9 <parameter name="reason">Task is multi-dimensional</parameter>
10
11When this happens the provider returns ``tool_calls=[]`` in the
12ChatCompletion response and we'd otherwise end the turn with a user-visible
13block of raw XML. The rescue parses these patterns and synthesises the tool
14calls our dispatch loop expects, so the turn continues.
15
16Guardrails:
17- We only rescue when the NATIVE tool_calls list is empty AND the content
18 contains at least one ``<tool>`` tag. We never override a model that
19 correctly used the native mechanism.
20- We strip the parsed XML out of the content so the user doesn't see it
21 rendered as prose.
22- We only rescue tool names that are in a supplied allowlist — preventing
23 accidental dispatch of hallucinated tools.
24- The rescue is NOT a substitute for using a capable model. We log every
25 rescue at WARNING so they show up in the observer and can be counted.
26"""
27
28from __future__ import annotations
29
30import json
31import logging
32import re
33import uuid
34from typing import Any, Dict, List, Sequence, Tuple
35
36logger = logging.getLogger(__name__)
37
38_TOOL_RE = re.compile(
39 r"<(?:tool|tool_use|function|function_call)>\s*([A-Za-z_][A-Za-z0-9_\-]*)\s*</(?:tool|tool_use|function|function_call)>",
40 re.IGNORECASE,
41)
42_PARAM_RE = re.compile(
43 r'<parameter\s+name\s*=\s*"([^"]+)"\s*>([\s\S]*?)</parameter>',
44 re.IGNORECASE,
45)
46
47def _xml_unescape(s: str) -> str:
48 return (
49 s.replace("&lt;", "<")
50 .replace("&gt;", ">")
51 .replace("&quot;", '"')
52 .replace("&apos;", "'")
53 .replace("&amp;", "&")
54 )
55
56def rescue_tool_calls_from_content(
57 content: str,
58 allowed_tool_names: Sequence[str],
59) -> Tuple[List[Dict[str, Any]], str]:
60 """Parse pseudo-XML tool calls out of ``content``.
61
62 Args:
63 content: Assistant content string (may contain 0+ pseudo-tool-calls).
64 allowed_tool_names: Names we're willing to dispatch. Rescue ignores
65 any tag whose name isn't in this set (logged at DEBUG).
66
67 Returns:
68 (tool_calls, stripped_content) — ``tool_calls`` in the same shape the
69 dispatcher expects (``{"id", "name", "arguments", "index"}``), and
70 the original content with the parsed XML removed.
71 """
72 if not content or "<" not in content:
73 return [], content
74
75 tool_matches = list(_TOOL_RE.finditer(content))
76 if not tool_matches:
77 return [], content
78
79 tool_calls: List[Dict[str, Any]] = []
80 removal_spans: List[Tuple[int, int]] = []
81
82 for i, tm in enumerate(tool_matches):
83 name = tm.group(1).strip()
84 span_start = tm.start()
85
86 region_end = tool_matches[i + 1].start() if i + 1 < len(tool_matches) else len(content)
87 region = content[tm.end():region_end]
88
89 params: Dict[str, Any] = {}
90 param_end_in_region = 0
91 for pm in _PARAM_RE.finditer(region):
92 params[pm.group(1).strip()] = _xml_unescape(pm.group(2))
93 param_end_in_region = pm.end()
94
95 removal_spans.append((span_start, tm.end() + param_end_in_region))
96
97 if name not in set(allowed_tool_names):
98 logger.warning(
99 f"[rescue] hallucinated tool name {name!r} in content "
100 f"(not in allowed set) — stripped from output, not dispatched"
101 )
102 continue
103
104 tool_calls.append({
105 "id": f"rescued_{uuid.uuid4().hex[:12]}",
106 "name": name,
107 "arguments": params,
108 "index": len(tool_calls),
109 })
110
111 stripped = []
112 cursor = 0
113 for start, end in sorted(removal_spans):
114 stripped.append(content[cursor:start])
115 cursor = end
116 stripped.append(content[cursor:])
117 new_content = re.sub(r"\s{3,}", "\n\n", "".join(stripped)).strip()
118
119 if tool_calls:
120 logger.warning(
121 f"[rescue] recovered {len(tool_calls)} tool call(s) from pseudo-XML "
122 f"content: {[tc['name'] for tc in tool_calls]}"
123 )
124 return tool_calls, new_content
125
126def try_json_arguments(arguments: Any) -> Any:
127 """If an argument value looks like JSON, parse it. Otherwise return as-is.
128
129 Anthropic-style XML puts all values as strings; some models emit
130 ``<parameter name="todos">[{...}, {...}]</parameter>`` where the value is
131 a JSON-encoded list. Try to unwrap that for each arg.
132 """
133 if not isinstance(arguments, dict):
134 return arguments
135 out: Dict[str, Any] = {}
136 for k, v in arguments.items():
137 if isinstance(v, str):
138 s = v.strip()
139 if s and s[0] in "[{" and s[-1] in "]}":
140 try:
141 out[k] = json.loads(s)
142 continue
143 except (json.JSONDecodeError, ValueError):
144 pass
145 out[k] = v
146 return out
147