2026-04-12 10:57:18 -04:00
""" Host-side LLM batch extraction — pure HTTP client, no atocore imports.
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
Fetches interactions from the AtoCore API , runs ` ` claude - p ` ` locally
for each , and POSTs candidates back . Zero dependency on atocore source
or Python packages — only uses stdlib + the ` ` claude ` ` CLI on PATH .
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
This is necessary because the ` ` claude ` ` CLI is on the Dalidou HOST
but not inside the Docker container , and the host ' s Python doesn ' t
have the container ' s dependencies (pydantic_settings, etc.).
2026-04-12 10:55:22 -04:00
"""
from __future__ import annotations
import argparse
import json
import os
2026-04-12 10:57:18 -04:00
import shutil
import subprocess
2026-04-12 10:55:22 -04:00
import sys
2026-04-12 10:57:18 -04:00
import tempfile
2026-04-12 10:55:22 -04:00
import urllib . error
import urllib . parse
import urllib . request
2026-04-12 10:57:18 -04:00
from datetime import datetime , timezone
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
DEFAULT_BASE_URL = os . environ . get ( " ATOCORE_BASE_URL " , " http://localhost:8100 " )
DEFAULT_MODEL = os . environ . get ( " ATOCORE_LLM_EXTRACTOR_MODEL " , " sonnet " )
DEFAULT_TIMEOUT_S = float ( os . environ . get ( " ATOCORE_LLM_EXTRACTOR_TIMEOUT_S " , " 90 " ) )
MAX_RESPONSE_CHARS = 8000
MAX_PROMPT_CHARS = 2000
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
MEMORY_TYPES = { " identity " , " preference " , " project " , " episodic " , " knowledge " , " adaptation " }
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
SYSTEM_PROMPT = """ You extract durable memory candidates from LLM conversation turns for a personal context engine called AtoCore.
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
AtoCore stores two kinds of knowledge :
2026-04-13 17:16:04 -04:00
A . PROJECT - SPECIFIC : applied decisions , constraints , and architecture for a named project . Known projects include p04 - gigabit , p05 - interferometer , p06 - polisher , atomizer - v2 , atocore , abb - space . If the conversation discusses a project NOT in this list , still tag it with the project name you identify — the system will auto - detect it as a new project or lead .
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
B . DOMAIN KNOWLEDGE : generalizable engineering insight that was EARNED through project work and is reusable across projects . Tag these with a domain instead of a project .
THE CRITICAL BAR FOR DOMAIN KNOWLEDGE :
Only extract insight that took real effort to discover . The test : " Would a competent engineer need experience to know this, or could they find it in 30 seconds on Google? " If they can look it up , do NOT extract it .
EXTRACT ( earned insight ) :
- " At F/1.2, Zerodur CTE gradient across the blank is the second-largest WFE contributor after gravity sag "
- " Preston removal rate model breaks down below 5N applied force because the contact assumption fails "
- " For swing-arm polishing, m=1 (coma) is NOT correctable by force modulation (score 0.09) "
DO NOT EXTRACT ( common knowledge ) :
- " Zerodur CTE is 0.05 ppm/K " ( textbook value )
- " FEA uses finite elements to discretize continuous domains " ( definition )
- " Python is a programming language " ( obvious )
2026-04-12 10:57:18 -04:00
Rules :
2026-04-12 10:55:22 -04:00
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
1. Only surface durable claims . Skip transient status , instructional guidance , troubleshooting , ephemeral recommendations , session recaps .
2. A candidate is durable when a reader coming back in two weeks would still need to know it .
3. Each candidate must stand alone in one sentence under 200 characters .
4. Type must be one of : project , knowledge , preference , adaptation .
5. For project - specific claims , set ` ` project ` ` to the project id .
6. For generalizable domain insight , set ` ` project ` ` to empty and set ` ` domain ` ` to one of : physics , materials , optics , mechanics , manufacturing , metrology , controls , software , math , finance .
7. When one conversation produces BOTH a project - specific fact AND a generalizable principle , emit BOTH as separate candidates .
8. Return [ ] on most turns . The bar is high . Empty is correct and expected .
9. Confidence 0.5 default . Raise to 0.6 only for unambiguous committed claims .
10. Output a raw JSON array only . No prose , no markdown fences .
2026-04-12 10:55:22 -04:00
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
Each array element :
2026-04-12 10:57:18 -04:00
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
{ " type " : " project|knowledge|preference|adaptation " , " content " : " ... " , " project " : " ... " , " domain " : " " , " confidence " : 0.5 }
2026-04-12 10:57:18 -04:00
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
Use ` ` project ` ` for project - scoped candidates . Use ` ` domain ` ` for cross - project knowledge . Never set both . """
2026-04-12 10:57:18 -04:00
_sandbox_cwd = None
def get_sandbox_cwd ( ) :
global _sandbox_cwd
if _sandbox_cwd is None :
_sandbox_cwd = tempfile . mkdtemp ( prefix = " ato-llm-extract- " )
return _sandbox_cwd
def api_get ( base_url , path , timeout = 10 ) :
2026-04-12 10:55:22 -04:00
req = urllib . request . Request ( f " { base_url } { path } " )
with urllib . request . urlopen ( req , timeout = timeout ) as resp :
return json . loads ( resp . read ( ) . decode ( " utf-8 " ) )
2026-04-12 10:57:18 -04:00
def api_post ( base_url , path , body , timeout = 10 ) :
2026-04-12 10:55:22 -04:00
data = json . dumps ( body ) . encode ( " utf-8 " )
req = urllib . request . Request (
2026-04-12 10:57:18 -04:00
f " { base_url } { path } " , method = " POST " ,
headers = { " Content-Type " : " application/json " } , data = data ,
2026-04-12 10:55:22 -04:00
)
with urllib . request . urlopen ( req , timeout = timeout ) as resp :
return json . loads ( resp . read ( ) . decode ( " utf-8 " ) )
2026-04-12 10:57:18 -04:00
def get_last_run ( base_url ) :
2026-04-12 10:55:22 -04:00
try :
state = api_get ( base_url , " /project/state/atocore?category=status " )
for entry in state . get ( " entries " , [ ] ) :
if entry . get ( " key " ) == " last_extract_batch_run " :
return entry [ " value " ]
except Exception :
pass
return None
2026-04-12 10:57:18 -04:00
def set_last_run ( base_url , timestamp ) :
2026-04-12 10:55:22 -04:00
try :
api_post ( base_url , " /project/state " , {
2026-04-12 10:57:18 -04:00
" project " : " atocore " , " category " : " status " ,
" key " : " last_extract_batch_run " , " value " : timestamp ,
2026-04-12 10:55:22 -04:00
" source " : " batch_llm_extract_live.py " ,
} )
except Exception :
pass
2026-04-12 14:34:33 -04:00
_known_projects : set [ str ] = set ( )
def _load_known_projects ( base_url ) :
""" Fetch registered project IDs from the API for R9 validation. """
global _known_projects
try :
data = api_get ( base_url , " /projects " )
_known_projects = { p [ " id " ] for p in data . get ( " projects " , [ ] ) }
for p in data . get ( " projects " , [ ] ) :
for alias in p . get ( " aliases " , [ ] ) :
_known_projects . add ( alias )
except Exception :
pass
2026-04-12 10:57:18 -04:00
def extract_one ( prompt , response , project , model , timeout_s ) :
""" Run claude -p on one interaction, return parsed candidates. """
if not shutil . which ( " claude " ) :
return [ ] , " claude_cli_missing "
prompt_excerpt = prompt [ : MAX_PROMPT_CHARS ]
response_excerpt = response [ : MAX_RESPONSE_CHARS ]
user_message = (
f " PROJECT HINT (may be empty): { project } \n \n "
f " USER PROMPT: \n { prompt_excerpt } \n \n "
f " ASSISTANT RESPONSE: \n { response_excerpt } \n \n "
" Return the JSON array now. "
)
args = [
" claude " , " -p " ,
" --model " , model ,
" --append-system-prompt " , SYSTEM_PROMPT ,
" --disable-slash-commands " ,
user_message ,
]
try :
completed = subprocess . run (
args , capture_output = True , text = True ,
timeout = timeout_s , cwd = get_sandbox_cwd ( ) ,
encoding = " utf-8 " , errors = " replace " ,
)
except subprocess . TimeoutExpired :
return [ ] , " timeout "
except Exception as exc :
return [ ] , f " subprocess_error: { exc } "
if completed . returncode != 0 :
return [ ] , f " exit_ { completed . returncode } "
raw = ( completed . stdout or " " ) . strip ( )
return parse_candidates ( raw , project ) , " "
def parse_candidates ( raw , interaction_project ) :
""" Parse model JSON output into candidate dicts. """
text = raw . strip ( )
if text . startswith ( " ``` " ) :
text = text . strip ( " ` " )
nl = text . find ( " \n " )
if nl > = 0 :
text = text [ nl + 1 : ]
if text . endswith ( " ``` " ) :
text = text [ : - 3 ]
text = text . strip ( )
if not text or text == " [] " :
return [ ]
if not text . lstrip ( ) . startswith ( " [ " ) :
start = text . find ( " [ " )
end = text . rfind ( " ] " )
if start > = 0 and end > start :
text = text [ start : end + 1 ]
try :
parsed = json . loads ( text )
except json . JSONDecodeError :
return [ ]
if not isinstance ( parsed , list ) :
return [ ]
results = [ ]
for item in parsed :
if not isinstance ( item , dict ) :
continue
mem_type = str ( item . get ( " type " ) or " " ) . strip ( ) . lower ( )
content = str ( item . get ( " content " ) or " " ) . strip ( )
2026-04-12 15:37:29 -04:00
model_project = str ( item . get ( " project " ) or " " ) . strip ( )
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
domain = str ( item . get ( " domain " ) or " " ) . strip ( ) . lower ( )
2026-04-12 15:37:29 -04:00
# R9 trust hierarchy: interaction scope always wins when set.
2026-04-13 17:16:04 -04:00
# For unscoped interactions, keep model's project tag even if
# unregistered — the system will detect new projects/leads.
2026-04-12 15:37:29 -04:00
if interaction_project :
2026-04-12 10:57:18 -04:00
project = interaction_project
2026-04-13 17:16:04 -04:00
elif model_project :
2026-04-12 15:37:29 -04:00
project = model_project
else :
project = " "
feat: dual-layer knowledge extraction + domain knowledge band
The extraction system now produces two kinds of candidates from
the same conversation:
A. PROJECT-SPECIFIC: applied facts scoped to a named project
(unchanged behavior)
B. DOMAIN KNOWLEDGE: generalizable engineering insight earned
through project work, tagged with a domain (physics, materials,
optics, mechanics, manufacturing, metrology, controls, software,
math, finance) and stored with project="" so it surfaces across
all projects.
Critical quality bar enforced in the system prompt: "Would a
competent engineer need experience to know this, or could they
find it in 30 seconds on Google?" Textbook values, definitions,
and obvious facts are explicitly excluded. Only hard-won insight
qualifies — the kind that takes weeks of FEA or real machining
experience to discover.
Domain tags are embedded in the content as a prefix ("[physics]",
"[materials]") so they survive without a schema migration. A future
column can parse them out.
Context builder gains a new tier between project memories and
retrieved chunks:
Tier 1: Trusted Project State (project-specific)
Tier 2: Identity / Preferences (global)
Tier 3: Project Memories (project-specific)
Tier 4: Domain Knowledge (NEW) (cross-project, 10% budget)
Tier 5: Retrieved Chunks (project-boosted)
Trim order: chunks -> domain knowledge -> project memories ->
identity/preference -> project state.
Host-side extraction script updated with the same prompt and
domain-tag handling.
LLM_EXTRACTOR_VERSION bumped to llm-0.3.0.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:04:04 -04:00
# Domain knowledge: embed tag in content for cross-project retrieval
if domain and not project :
content = f " [ { domain } ] { content } "
2026-04-12 10:57:18 -04:00
conf = item . get ( " confidence " , 0.5 )
if mem_type not in MEMORY_TYPES or not content :
continue
try :
conf = max ( 0.0 , min ( 1.0 , float ( conf ) ) )
except ( TypeError , ValueError ) :
conf = 0.5
results . append ( {
" memory_type " : mem_type ,
" content " : content [ : 1000 ] ,
" project " : project ,
" confidence " : conf ,
} )
return results
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
def main ( ) :
2026-04-12 10:55:22 -04:00
parser = argparse . ArgumentParser ( description = " Host-side LLM batch extraction " )
parser . add_argument ( " --base-url " , default = DEFAULT_BASE_URL )
parser . add_argument ( " --limit " , type = int , default = 50 )
2026-04-12 10:57:18 -04:00
parser . add_argument ( " --since " , default = None )
parser . add_argument ( " --model " , default = DEFAULT_MODEL )
2026-04-12 10:55:22 -04:00
args = parser . parse_args ( )
2026-04-12 14:34:33 -04:00
_load_known_projects ( args . base_url )
2026-04-12 10:55:22 -04:00
since = args . since or get_last_run ( args . base_url )
2026-04-12 14:34:33 -04:00
print ( f " since= { since or ' (first run) ' } limit= { args . limit } model= { args . model } known_projects= { len ( _known_projects ) } " )
2026-04-12 10:55:22 -04:00
2026-04-12 10:57:18 -04:00
params = [ f " limit= { args . limit } " ]
if since :
params . append ( f " since= { urllib . parse . quote ( since ) } " )
2026-04-12 10:58:00 -04:00
listing = api_get ( args . base_url , f " /interactions? { ' & ' . join ( params ) } " )
interaction_summaries = listing . get ( " interactions " , [ ] )
print ( f " listed { len ( interaction_summaries ) } interactions " )
2026-04-12 10:55:22 -04:00
processed = 0
total_candidates = 0
total_persisted = 0
errors = 0
2026-04-12 10:58:00 -04:00
for summary in interaction_summaries :
resp_chars = summary . get ( " response_chars " , 0 ) or 0
if resp_chars < 50 :
continue
iid = summary [ " id " ]
try :
raw = api_get (
args . base_url ,
f " /interactions/ { urllib . parse . quote ( iid , safe = ' ' ) } " ,
)
except Exception as exc :
print ( f " ! { iid [ : 8 ] } : fetch failed: { exc } " , file = sys . stderr )
errors + = 1
continue
2026-04-12 10:55:22 -04:00
response_text = raw . get ( " response " , " " ) or " "
2026-04-12 10:57:18 -04:00
if not response_text . strip ( ) or len ( response_text ) < 50 :
2026-04-12 10:55:22 -04:00
continue
2026-04-12 10:57:18 -04:00
candidates , error = extract_one (
2026-04-12 10:55:22 -04:00
prompt = raw . get ( " prompt " , " " ) or " " ,
response = response_text ,
project = raw . get ( " project " , " " ) or " " ,
2026-04-12 10:57:18 -04:00
model = args . model ,
timeout_s = DEFAULT_TIMEOUT_S ,
2026-04-12 10:55:22 -04:00
)
2026-04-12 10:57:18 -04:00
if error :
print ( f " ! { raw [ ' id ' ] [ : 8 ] } : { error } " , file = sys . stderr )
2026-04-12 10:55:22 -04:00
errors + = 1
continue
processed + = 1
total_candidates + = len ( candidates )
for c in candidates :
try :
api_post ( args . base_url , " /memory " , {
2026-04-12 10:57:18 -04:00
" memory_type " : c [ " memory_type " ] ,
" content " : c [ " content " ] ,
" project " : c [ " project " ] ,
" confidence " : c [ " confidence " ] ,
2026-04-12 10:55:22 -04:00
" status " : " candidate " ,
} )
total_persisted + = 1
except urllib . error . HTTPError as exc :
2026-04-12 10:57:18 -04:00
if exc . code != 400 :
2026-04-12 10:55:22 -04:00
errors + = 1
except Exception :
errors + = 1
now = datetime . now ( timezone . utc ) . strftime ( " % Y- % m- %d % H: % M: % S " )
set_last_run ( args . base_url , now )
print ( f " processed= { processed } candidates= { total_candidates } persisted= { total_persisted } errors= { errors } " )
if __name__ == " __main__ " :
2026-04-12 10:57:18 -04:00
main ( )