ring/default/lib/compound_learnings/learning_parser.py
Fred Amaral 9350f9f028
feat(compound-learnings): implement self-improving learning loop system
Extracts session learnings automatically via SessionEnd hook, detects recurring patterns across sessions (3+ occurrences), and generates proposals for new rules/skills with user approval. Includes pattern detection with fuzzy matching, rule/skill generation, and secure file handling with path traversal prevention.

X-Lerian-Ref: 0x1
2025-12-27 23:07:10 -03:00

152 lines
4.8 KiB
Python

"""Parse learning markdown files into structured data.
This module extracts structured information from session learning files
stored in .ring/cache/learnings/.
"""
import re
from pathlib import Path
from typing import Dict, List, Optional
def _extract_session_id(filename: str) -> str:
"""Extract session ID from filename, expecting YYYY-MM-DD-<session>.md format.
Args:
filename: File stem without extension (e.g., "2025-12-27-abc123")
Returns:
Session ID portion, or empty string if no valid session ID found
"""
parts = filename.split("-")
# Validate date prefix: must have at least 4 parts (YYYY-MM-DD-session)
if len(parts) >= 4:
year, month, day = parts[0], parts[1], parts[2]
# Verify first 3 parts look like a date
if len(year) == 4 and len(month) == 2 and len(day) == 2:
if year.isdigit() and month.isdigit() and day.isdigit():
# Join remaining parts as session ID
return "-".join(parts[3:])
# No hyphen or no valid date prefix - return filename as-is
if "-" not in filename:
return filename
# Has hyphens but not valid date format - return empty
return ""
def parse_learning_file(file_path: Path) -> Dict:
"""Parse a learning markdown file into structured data.
Args:
file_path: Path to the learning markdown file
Returns:
Dictionary with session_id, date, what_worked, what_failed,
key_decisions, and patterns
"""
try:
content = file_path.read_text(encoding='utf-8')
except (UnicodeDecodeError, IOError, PermissionError):
return {
"session_id": "",
"date": "",
"file_path": str(file_path),
"what_worked": [],
"what_failed": [],
"key_decisions": [],
"patterns": [],
"error": "Failed to read file",
}
# Extract session ID from filename: YYYY-MM-DD-<session-id>.md
filename = file_path.stem # e.g., "2025-12-27-abc123"
session_id = _extract_session_id(filename)
# Extract date from content
date_match = re.search(r'\*\*Date:\*\*\s*(.+)', content)
date = date_match.group(1).strip() if date_match else ""
return {
"session_id": session_id,
"date": date,
"file_path": str(file_path),
"what_worked": extract_what_worked(content),
"what_failed": extract_what_failed(content),
"key_decisions": extract_key_decisions(content),
"patterns": extract_patterns(content),
}
def _extract_section_items(content: str, section_name: str) -> List[str]:
"""Extract bullet items from a markdown section.
Args:
content: Full markdown content
section_name: Name of section (e.g., "What Worked")
Returns:
List of bullet point items (without the leading "- ")
"""
# Match section header and capture until next ## or end
pattern = rf'##\s*{re.escape(section_name)}\s*\n(.*?)(?=\n##|\Z)'
match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
if not match:
return []
section_content = match.group(1)
# Extract bullet points (lines starting with "- ")
items = []
for line in section_content.split('\n'):
line = line.strip()
if line.startswith('- '):
items.append(line[2:].strip())
return items
def extract_what_worked(content: str) -> List[str]:
"""Extract items from 'What Worked' section."""
return _extract_section_items(content, "What Worked")
def extract_what_failed(content: str) -> List[str]:
"""Extract items from 'What Failed' section."""
return _extract_section_items(content, "What Failed")
def extract_key_decisions(content: str) -> List[str]:
"""Extract items from 'Key Decisions' section."""
return _extract_section_items(content, "Key Decisions")
def extract_patterns(content: str) -> List[str]:
"""Extract items from 'Patterns' section."""
return _extract_section_items(content, "Patterns")
def load_all_learnings(learnings_dir: Path) -> List[Dict]:
"""Load all learning files from a directory.
Args:
learnings_dir: Path to .ring/cache/learnings/ directory
Returns:
List of parsed learning dictionaries, sorted by date (newest first)
"""
if not learnings_dir.exists():
return []
learnings = []
for file_path in learnings_dir.glob("*.md"):
try:
learning = parse_learning_file(file_path)
if not learning.get("error"):
learnings.append(learning)
except Exception as e:
# Log but continue processing other files
print(f"Warning: Failed to parse {file_path}: {e}")
# Sort by date (newest first)
learnings.sort(key=lambda x: x.get("date", ""), reverse=True)
return learnings