ring/default/lib/artifact-index/artifact_query.py
Fred Amaral 5da56a6c0c
fix: resolve artifact indexing and compound learnings pipeline issues
- Fix learning extraction not creating files (Stop event stdin issue)
- Fix continuity ledgers not being indexed (missing path patterns)
- Enable query auto-save by default for compound learning
- Use basename for CONTINUITY pattern matching (more precise)
- Format long bash conditional for readability
- Remove redundant --save flag (now default behavior)

X-Lerian-Ref: 0x1
2026-01-08 16:16:51 -03:00

588 lines
20 KiB
Python
Executable file

#!/usr/bin/env python3
"""
USAGE: artifact_query.py <query> [--type TYPE] [--outcome OUTCOME] [--limit N] [--db PATH] [--json]
Search the Artifact Index for relevant precedent using FTS5 full-text search.
Examples:
# Search for authentication-related work
python3 artifact_query.py "authentication OAuth JWT"
# Search only successful handoffs
python3 artifact_query.py "implement agent" --outcome SUCCEEDED
# Search plans only
python3 artifact_query.py "API design" --type plans
# Output as JSON for programmatic use
python3 artifact_query.py "context management" --json
# Limit results
python3 artifact_query.py "testing" --limit 3
"""
import argparse
import hashlib
import json
import sqlite3
import sys
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from utils import get_project_root, get_db_path, validate_limit
def escape_fts5_query(query: str) -> str:
"""Escape FTS5 query to prevent syntax errors.
Splits query into words and joins with OR for flexible matching.
Each word is quoted to handle special characters.
"""
words = query.split()
# Quote each word, escaping internal quotes
quoted_words = [f'"{w.replace(chr(34), chr(34)+chr(34))}"' for w in words if w]
# Join with OR for flexible matching
return " OR ".join(quoted_words) if quoted_words else '""'
def search_handoffs(
conn: sqlite3.Connection,
query: str,
outcome: Optional[str] = None,
limit: int = 5
) -> List[Dict[str, Any]]:
"""Search handoffs using FTS5 with BM25 ranking."""
sql = """
SELECT h.id, h.session_name, h.task_number, h.task_summary,
h.what_worked, h.what_failed, h.key_decisions,
h.outcome, h.file_path, h.created_at,
handoffs_fts.rank as score
FROM handoffs_fts
JOIN handoffs h ON handoffs_fts.rowid = h.rowid
WHERE handoffs_fts MATCH ?
"""
params: List[Any] = [escape_fts5_query(query)]
if outcome:
sql += " AND h.outcome = ?"
params.append(outcome)
sql += " ORDER BY rank LIMIT ?"
params.append(limit)
try:
cursor = conn.execute(sql, params)
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except sqlite3.OperationalError:
# FTS table may be empty
return []
def query_for_planning(conn: sqlite3.Connection, topic: str, limit: int = 5) -> Dict[str, Any]:
"""Query artifact index for planning context.
Returns structured data optimized for plan generation:
- Successful implementations (what worked)
- Failed implementations (what to avoid)
- Relevant past plans
Performance target: <200ms total query time.
"""
start_time = time.time()
results: Dict[str, Any] = {
"topic": topic,
"successful_handoffs": [],
"failed_handoffs": [],
"relevant_plans": [],
"query_time_ms": 0,
"is_empty_index": False
}
# Check if index has any data
try:
cursor = conn.execute("SELECT COUNT(*) FROM handoffs")
total_handoffs = cursor.fetchone()[0]
except sqlite3.OperationalError:
# Treat as empty index if schema is corrupted or table doesn't exist
results["is_empty_index"] = True
results["query_time_ms"] = int((time.time() - start_time) * 1000)
return results
if total_handoffs == 0:
results["is_empty_index"] = True
results["query_time_ms"] = int((time.time() - start_time) * 1000)
return results
# Query success and failure categories separately to ensure representation
# This prevents successful handoffs from crowding out failures in results
# Get successful handoffs (SUCCEEDED and PARTIAL_PLUS)
succeeded = search_handoffs(conn, topic, outcome="SUCCEEDED", limit=limit)
partial_plus = search_handoffs(conn, topic, outcome="PARTIAL_PLUS", limit=limit)
results["successful_handoffs"] = (succeeded + partial_plus)[:limit]
# Get failed handoffs (FAILED and PARTIAL_MINUS)
failed = search_handoffs(conn, topic, outcome="FAILED", limit=limit)
partial_minus = search_handoffs(conn, topic, outcome="PARTIAL_MINUS", limit=limit)
results["failed_handoffs"] = (failed + partial_minus)[:limit]
# Query relevant plans (capped at 3 for context size, limit controls handoffs only)
results["relevant_plans"] = search_plans(conn, topic, limit=min(limit, 3))
results["query_time_ms"] = int((time.time() - start_time) * 1000)
return results
def format_planning_results(results: Dict[str, Any]) -> str:
"""Format planning query results for agent consumption."""
output: List[str] = []
output.append("## Historical Precedent")
output.append("")
if results.get("is_empty_index"):
output.append("**Note:** No historical data available (new project or empty index).")
output.append("Proceed with standard planning approach.")
output.append("")
return "\n".join(output)
# Successful implementations
successful = results.get("successful_handoffs", [])
if successful:
output.append("### Successful Implementations (Reference These)")
output.append("")
for h in successful:
session = h.get('session_name', 'unknown')
task = h.get('task_number', '?')
outcome = h.get('outcome', 'UNKNOWN')
output.append(f"**[{session}/task-{task}]** ({outcome})")
summary = h.get('task_summary', '')
if summary:
output.append(f"- Summary: {summary[:200]}")
what_worked = h.get('what_worked', '')
if what_worked:
output.append(f"- What worked: {what_worked[:300]}")
output.append(f"- File: `{h.get('file_path', 'unknown')}`")
output.append("")
else:
output.append("### Successful Implementations")
output.append("No relevant successful implementations found.")
output.append("")
# Failed implementations
failed = results.get("failed_handoffs", [])
if failed:
output.append("### Failed Implementations (AVOID These Patterns)")
output.append("")
for h in failed:
session = h.get('session_name', 'unknown')
task = h.get('task_number', '?')
outcome = h.get('outcome', 'UNKNOWN')
output.append(f"**[{session}/task-{task}]** ({outcome})")
summary = h.get('task_summary', '')
if summary:
output.append(f"- Summary: {summary[:200]}")
what_failed = h.get('what_failed', '')
if what_failed:
output.append(f"- What failed: {what_failed[:300]}")
output.append(f"- File: `{h.get('file_path', 'unknown')}`")
output.append("")
else:
output.append("### Failed Implementations")
output.append("No relevant failures found (good sign!).")
output.append("")
# Relevant plans
plans = results.get("relevant_plans", [])
if plans:
output.append("### Relevant Past Plans")
output.append("")
for p in plans:
title = p.get('title', 'Untitled')
output.append(f"**{title}**")
overview = p.get('overview', '')
if overview:
output.append(f"- Overview: {overview[:200]}")
output.append(f"- File: `{p.get('file_path', 'unknown')}`")
output.append("")
query_time = results.get("query_time_ms", 0)
output.append("---")
output.append(f"*Query completed in {query_time}ms*")
return "\n".join(output)
def search_plans(
conn: sqlite3.Connection,
query: str,
limit: int = 3
) -> List[Dict[str, Any]]:
"""Search plans using FTS5 with BM25 ranking."""
sql = """
SELECT p.id, p.title, p.overview, p.approach, p.file_path, p.created_at,
plans_fts.rank as score
FROM plans_fts
JOIN plans p ON plans_fts.rowid = p.rowid
WHERE plans_fts MATCH ?
ORDER BY rank
LIMIT ?
"""
try:
cursor = conn.execute(sql, [escape_fts5_query(query), limit])
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except sqlite3.OperationalError:
return []
def search_continuity(
conn: sqlite3.Connection,
query: str,
limit: int = 3
) -> List[Dict[str, Any]]:
"""Search continuity ledgers using FTS5 with BM25 ranking."""
sql = """
SELECT c.id, c.session_name, c.goal, c.key_learnings, c.key_decisions,
c.state_now, c.created_at, c.file_path,
continuity_fts.rank as score
FROM continuity_fts
JOIN continuity c ON continuity_fts.rowid = c.rowid
WHERE continuity_fts MATCH ?
ORDER BY rank
LIMIT ?
"""
try:
cursor = conn.execute(sql, [escape_fts5_query(query), limit])
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except sqlite3.OperationalError:
return []
def search_past_queries(
conn: sqlite3.Connection,
query: str,
limit: int = 2
) -> List[Dict[str, Any]]:
"""Check if similar questions have been asked before (compound learning)."""
sql = """
SELECT q.id, q.question, q.answer, q.was_helpful, q.created_at,
queries_fts.rank as score
FROM queries_fts
JOIN queries q ON queries_fts.rowid = q.rowid
WHERE queries_fts MATCH ?
ORDER BY rank
LIMIT ?
"""
try:
cursor = conn.execute(sql, [escape_fts5_query(query), limit])
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
except sqlite3.OperationalError:
return []
def get_handoff_by_id(conn: sqlite3.Connection, handoff_id: str) -> Optional[Dict[str, Any]]:
"""Get a handoff by its ID."""
sql = """
SELECT id, session_name, task_number, task_summary,
what_worked, what_failed, key_decisions,
outcome, file_path, created_at
FROM handoffs
WHERE id = ?
LIMIT 1
"""
cursor = conn.execute(sql, [handoff_id])
columns = [desc[0] for desc in cursor.description]
row = cursor.fetchone()
if row:
return dict(zip(columns, row))
return None
def save_query(conn: sqlite3.Connection, question: str, answer: str, matches: Dict[str, List]) -> None:
"""Save query for compound learning."""
query_id = hashlib.md5(f"{question}{datetime.now().isoformat()}".encode()).hexdigest()[:12]
conn.execute("""
INSERT INTO queries (id, question, answer, handoffs_matched, plans_matched, continuity_matched)
VALUES (?, ?, ?, ?, ?, ?)
""", (
query_id,
question,
answer,
json.dumps([h["id"] for h in matches.get("handoffs", [])]),
json.dumps([p["id"] for p in matches.get("plans", [])]),
json.dumps([c["id"] for c in matches.get("continuity", [])]),
))
conn.commit()
def format_results(results: Dict[str, List], verbose: bool = False) -> str:
"""Format search results for human-readable display."""
output: List[str] = []
# Past queries (compound learning)
if results.get("past_queries"):
output.append("## Previously Asked")
for q in results["past_queries"]:
question = q.get('question', '')[:100]
answer = q.get('answer', '')[:200]
output.append(f"- **Q:** {question}...")
output.append(f" **A:** {answer}...")
output.append("")
# Handoffs
if results.get("handoffs"):
output.append("## Relevant Handoffs")
for h in results["handoffs"]:
status_icon = {
"SUCCEEDED": "[OK]",
"PARTIAL_PLUS": "[~+]",
"PARTIAL_MINUS": "[~-]",
"FAILED": "[X]",
"UNKNOWN": "[?]"
}.get(h.get("outcome", "UNKNOWN"), "[?]")
session = h.get('session_name', 'unknown')
task = h.get('task_number', '?')
output.append(f"### {status_icon} {session}/task-{task}")
summary = h.get('task_summary', '')[:200]
if summary:
output.append(f"**Summary:** {summary}")
what_worked = h.get("what_worked", "")
if what_worked:
output.append(f"**What worked:** {what_worked[:200]}")
what_failed = h.get("what_failed", "")
if what_failed:
output.append(f"**What failed:** {what_failed[:200]}")
output.append(f"**File:** `{h.get('file_path', '')}`")
output.append("")
# Plans
if results.get("plans"):
output.append("## Relevant Plans")
for p in results["plans"]:
title = p.get('title', 'Untitled')
output.append(f"### {title}")
overview = p.get('overview', '')[:200]
if overview:
output.append(f"**Overview:** {overview}")
output.append(f"**File:** `{p.get('file_path', '')}`")
output.append("")
# Continuity
if results.get("continuity"):
output.append("## Related Sessions")
for c in results["continuity"]:
session = c.get('session_name', 'unknown')
output.append(f"### Session: {session}")
goal = c.get('goal', '')[:200]
if goal:
output.append(f"**Goal:** {goal}")
key_learnings = c.get("key_learnings", "")
if key_learnings:
output.append(f"**Key learnings:** {key_learnings[:200]}")
output.append(f"**File:** `{c.get('file_path', '')}`")
output.append("")
if not any(results.values()):
output.append("No relevant precedent found.")
return "\n".join(output)
def main() -> int:
parser = argparse.ArgumentParser(
description="Search the Artifact Index for relevant precedent",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument("query", nargs="*", help="Search query")
parser.add_argument(
"--mode", "-m",
choices=["search", "planning"],
default="search",
help="Query mode: 'search' for general queries, 'planning' for structured planning context"
)
parser.add_argument(
"--type", "-t",
choices=["handoffs", "plans", "continuity", "all"],
default="all",
help="Type of artifacts to search (default: all)"
)
parser.add_argument(
"--outcome", "-o",
choices=["SUCCEEDED", "PARTIAL_PLUS", "PARTIAL_MINUS", "FAILED"],
help="Filter handoffs by outcome"
)
parser.add_argument(
"--limit", "-l",
type=validate_limit,
default=5,
help="Maximum results per category (1-100, default: 5)"
)
parser.add_argument(
"--db",
type=str,
metavar="PATH",
help="Custom database path"
)
parser.add_argument(
"--no-save",
action="store_true",
help="Disable automatic query saving (saving enabled by default)"
)
parser.add_argument(
"--json", "-j",
action="store_true",
help="Output as JSON"
)
parser.add_argument(
"--id",
type=str,
metavar="ID",
help="Get specific handoff by ID"
)
parser.add_argument(
"--stats",
action="store_true",
help="Show database statistics"
)
args = parser.parse_args()
db_path = get_db_path(args.db)
# Graceful handling for missing database in planning mode
if not db_path.exists():
if args.mode == "planning":
# Planning mode: return empty index result (normal for new projects)
query = " ".join(args.query) if args.query else ""
results = {
"topic": query,
"successful_handoffs": [],
"failed_handoffs": [],
"relevant_plans": [],
"query_time_ms": 0,
"is_empty_index": True,
"message": "No artifact index found. This is normal for new projects."
}
if args.json:
print(json.dumps(results, indent=2, default=str))
else:
print(format_planning_results(results))
return 0
else:
print(f"Database not found: {db_path}", file=sys.stderr)
print("Run: python3 artifact_index.py --all", file=sys.stderr)
return 1
conn = sqlite3.connect(str(db_path), timeout=30.0)
# Stats mode
if args.stats:
stats = {
"handoffs": conn.execute("SELECT COUNT(*) FROM handoffs").fetchone()[0],
"plans": conn.execute("SELECT COUNT(*) FROM plans").fetchone()[0],
"continuity": conn.execute("SELECT COUNT(*) FROM continuity").fetchone()[0],
"queries": conn.execute("SELECT COUNT(*) FROM queries").fetchone()[0],
}
if args.json:
print(json.dumps(stats, indent=2))
else:
print("## Artifact Index Statistics")
print(f"- Handoffs: {stats['handoffs']}")
print(f"- Plans: {stats['plans']}")
print(f"- Continuity ledgers: {stats['continuity']}")
print(f"- Saved queries: {stats['queries']}")
conn.close()
return 0
# ID lookup mode
if args.id:
handoff = get_handoff_by_id(conn, args.id)
if args.json:
print(json.dumps(handoff, indent=2, default=str))
elif handoff:
print(f"## Handoff: {handoff.get('session_name')}/task-{handoff.get('task_number')}")
print(f"**Outcome:** {handoff.get('outcome', 'UNKNOWN')}")
print(f"**Summary:** {handoff.get('task_summary', '')}")
print(f"**File:** {handoff.get('file_path')}")
else:
print(f"Handoff not found: {args.id}", file=sys.stderr)
conn.close()
return 0 if handoff else 1
# Planning mode - structured output for plan generation
if args.mode == "planning":
if not args.query:
print("Error: Query required for planning mode", file=sys.stderr)
print("Usage: python3 artifact_query.py --mode planning 'topic keywords'", file=sys.stderr)
conn.close()
return 1
query = " ".join(args.query)
results = query_for_planning(conn, query, args.limit)
conn.close()
if args.json:
print(json.dumps(results, indent=2, default=str))
else:
print(format_planning_results(results))
return 0
# Regular search mode
if not args.query:
parser.print_help()
conn.close()
return 0
query = " ".join(args.query)
results: Dict[str, List] = {}
# Always check past queries first
results["past_queries"] = search_past_queries(conn, query)
if args.type in ["handoffs", "all"]:
results["handoffs"] = search_handoffs(conn, query, args.outcome, args.limit)
if args.type in ["plans", "all"]:
results["plans"] = search_plans(conn, query, args.limit)
if args.type in ["continuity", "all"]:
results["continuity"] = search_continuity(conn, query, args.limit)
if args.json:
formatted = json.dumps(results, indent=2, default=str)
print(formatted)
else:
formatted = format_results(results)
print(formatted)
# Auto-save queries for compound learning (unless --no-save)
if not args.no_save:
save_query(conn, query, formatted, results)
if not args.json:
print("\n[Query saved for compound learning]")
conn.close()
return 0
if __name__ == "__main__":
sys.exit(main())