Musicseerr/backend/infrastructure/cache/disk_cache.py

476 lines
18 KiB
Python
Raw Normal View History

2026-04-03 14:53:00 +00:00
import asyncio
import hashlib
import json
import logging
import shutil
import time
from pathlib import Path
from typing import Any
from infrastructure.serialization import to_jsonable
logger = logging.getLogger(__name__)
def _encode_json(value: Any) -> str:
return json.dumps(value, ensure_ascii=True, separators=(",", ":"))
def _decode_json(text: str) -> Any:
return json.loads(text)
class DiskMetadataCache:
def __init__(
self,
base_path: Path,
recent_metadata_max_size_mb: int = 128,
recent_covers_max_size_mb: int = 0,
persistent_metadata_ttl_hours: int = 24,
):
self.base_path = Path(base_path)
self.recent_metadata_max_size_bytes = max(recent_metadata_max_size_mb, 0) * 1024 * 1024
self.recent_covers_max_size_bytes = max(recent_covers_max_size_mb, 0) * 1024 * 1024
self.default_ttl_seconds = max(persistent_metadata_ttl_hours, 1) * 3600
self._recent_albums_dir = self.base_path / "recent" / "albums"
self._recent_artists_dir = self.base_path / "recent" / "artists"
self._recent_covers_dir = self.base_path / "recent" / "covers"
self._persistent_albums_dir = self.base_path / "persistent" / "albums"
self._persistent_artists_dir = self.base_path / "persistent" / "artists"
self._recent_audiodb_artists_dir = self.base_path / "recent" / "audiodb_artists"
self._recent_audiodb_albums_dir = self.base_path / "recent" / "audiodb_albums"
self._persistent_audiodb_artists_dir = self.base_path / "persistent" / "audiodb_artists"
self._persistent_audiodb_albums_dir = self.base_path / "persistent" / "audiodb_albums"
self._ensure_dirs()
def _ensure_dirs(self) -> None:
for path in (
self._recent_albums_dir,
self._recent_artists_dir,
self._recent_covers_dir,
self._persistent_albums_dir,
self._persistent_artists_dir,
self._recent_audiodb_artists_dir,
self._recent_audiodb_albums_dir,
self._persistent_audiodb_artists_dir,
self._persistent_audiodb_albums_dir,
):
path.mkdir(parents=True, exist_ok=True)
@staticmethod
def _cache_hash(identifier: str) -> str:
return hashlib.sha1(identifier.encode()).hexdigest()
@staticmethod
def _meta_path(file_path: Path) -> Path:
return file_path.with_suffix(".meta.json")
def _entity_paths(self, entity_type: str, identifier: str) -> tuple[Path, Path]:
cache_hash = self._cache_hash(identifier)
if entity_type == "album":
return (
self._recent_albums_dir / f"{cache_hash}.json",
self._persistent_albums_dir / f"{cache_hash}.json",
)
if entity_type == "artist":
return (
self._recent_artists_dir / f"{cache_hash}.json",
self._persistent_artists_dir / f"{cache_hash}.json",
)
if entity_type == "audiodb_artist":
return (
self._recent_audiodb_artists_dir / f"{cache_hash}.json",
self._persistent_audiodb_artists_dir / f"{cache_hash}.json",
)
if entity_type == "audiodb_album":
return (
self._recent_audiodb_albums_dir / f"{cache_hash}.json",
self._persistent_audiodb_albums_dir / f"{cache_hash}.json",
)
raise ValueError(f"Unsupported entity type: {entity_type}")
def _delete_file_pair(self, file_path: Path) -> None:
file_path.unlink(missing_ok=True)
self._meta_path(file_path).unlink(missing_ok=True)
def _load_meta(self, meta_path: Path) -> dict[str, Any]:
if not meta_path.exists():
return {}
try:
payload = _decode_json(meta_path.read_text())
except (json.JSONDecodeError, OSError, TypeError):
return {}
return payload if isinstance(payload, dict) else {}
@staticmethod
def _is_expired(meta: dict[str, Any]) -> bool:
expires_at = meta.get("expires_at")
return isinstance(expires_at, (int, float)) and time.time() > float(expires_at)
def _cleanup_expired_directory(self, directory: Path) -> int:
removed = 0
handled_meta_paths: set[Path] = set()
for data_path in directory.iterdir():
if not data_path.is_file() or data_path.name.endswith(".meta.json"):
continue
meta_path = self._meta_path(data_path)
handled_meta_paths.add(meta_path)
if self._is_expired(self._load_meta(meta_path)):
self._delete_file_pair(data_path)
removed += 1
for meta_path in directory.glob("*.meta.json"):
if meta_path in handled_meta_paths:
continue
if self._is_expired(self._load_meta(meta_path)):
meta_path.unlink(missing_ok=True)
removed += 1
return removed
def _enforce_size_limit_for_directory(self, directory: Path, max_size_bytes: int) -> int:
if max_size_bytes <= 0:
return 0
candidates: list[tuple[float, Path, int]] = []
total_size = 0
for data_path in directory.iterdir():
if not data_path.is_file() or data_path.name.endswith(".meta.json"):
continue
try:
size_bytes = data_path.stat().st_size
except FileNotFoundError:
continue
meta = self._load_meta(self._meta_path(data_path))
last_accessed = float(meta.get("last_accessed", meta.get("created_at", 0.0)) or 0.0)
total_size += size_bytes
candidates.append((last_accessed, data_path, size_bytes))
if total_size <= max_size_bytes:
return 0
bytes_to_free = total_size - max_size_bytes
freed = 0
for _, data_path, size_bytes in sorted(candidates, key=lambda item: item[0]):
self._delete_file_pair(data_path)
freed += size_bytes
if freed >= bytes_to_free:
break
return freed
def _write_json_entry(self, file_path: Path, payload: dict[str, Any], expires_at: float | None) -> None:
file_path.parent.mkdir(parents=True, exist_ok=True)
now = time.time()
file_path.write_text(_encode_json(payload))
meta = {
"created_at": now,
"last_accessed": now,
}
if expires_at is not None:
meta["expires_at"] = expires_at
self._meta_path(file_path).write_text(_encode_json(meta))
def _read_json_entry(self, file_path: Path, honor_expiry: bool) -> dict[str, Any] | None:
if not file_path.exists():
return None
meta_path = self._meta_path(file_path)
meta: dict[str, Any] = {}
if meta_path.exists():
try:
meta = _decode_json(meta_path.read_text())
except (json.JSONDecodeError, OSError, TypeError):
meta = {}
if honor_expiry:
expires_at = meta.get("expires_at")
if isinstance(expires_at, (int, float)) and time.time() > float(expires_at):
self._delete_file_pair(file_path)
return None
try:
payload = _decode_json(file_path.read_text())
except (json.JSONDecodeError, OSError, TypeError):
self._delete_file_pair(file_path)
return None
if not isinstance(payload, dict):
self._delete_file_pair(file_path)
return None
if meta_path.exists():
meta["last_accessed"] = time.time()
try:
meta_path.write_text(_encode_json(meta))
except OSError as exc:
logger.debug("Failed to update disk cache access time for %s: %s", meta_path, exc)
return payload
async def _set_entity(
self,
entity_type: str,
identifier: str,
payload: Any,
is_monitored: bool,
ttl_seconds: int | None,
) -> None:
builtins = to_jsonable(payload)
if not isinstance(builtins, dict):
raise TypeError(f"Expected mapping payload for {entity_type} cache, got {type(builtins)!r}")
recent_path, persistent_path = self._entity_paths(entity_type, identifier)
def operation() -> None:
target_path = persistent_path if is_monitored else recent_path
other_path = recent_path if is_monitored else persistent_path
self._delete_file_pair(other_path)
expires_at = None
if ttl_seconds is not None:
expires_at = time.time() + max(ttl_seconds, 1)
elif not is_monitored:
expires_at = time.time() + max(self.default_ttl_seconds, 1)
self._write_json_entry(target_path, builtins, expires_at)
await asyncio.to_thread(operation)
async def _get_entity(self, entity_type: str, identifier: str) -> dict[str, Any] | None:
recent_path, persistent_path = self._entity_paths(entity_type, identifier)
def operation() -> dict[str, Any] | None:
persistent_payload = self._read_json_entry(persistent_path, honor_expiry=True)
if persistent_payload is not None:
return persistent_payload
return self._read_json_entry(recent_path, honor_expiry=True)
return await asyncio.to_thread(operation)
async def set_album(
self,
musicbrainz_id: str,
album_info: Any,
is_monitored: bool = False,
ttl_seconds: int | None = None,
) -> None:
await self._set_entity("album", musicbrainz_id, album_info, is_monitored, ttl_seconds)
async def get_album(self, musicbrainz_id: str) -> dict[str, Any] | None:
return await self._get_entity("album", musicbrainz_id)
async def set_artist(
self,
musicbrainz_id: str,
artist_info: Any,
is_monitored: bool = False,
ttl_seconds: int | None = None,
) -> None:
await self._set_entity("artist", musicbrainz_id, artist_info, is_monitored, ttl_seconds)
async def get_artist(self, musicbrainz_id: str) -> dict[str, Any] | None:
return await self._get_entity("artist", musicbrainz_id)
async def set_audiodb_artist(
self,
identifier: str,
payload: Any,
is_monitored: bool = False,
ttl_seconds: int | None = None,
) -> None:
await self._set_entity("audiodb_artist", identifier, payload, is_monitored, ttl_seconds)
async def get_audiodb_artist(self, identifier: str) -> dict[str, Any] | None:
return await self._get_entity("audiodb_artist", identifier)
async def set_audiodb_album(
self,
identifier: str,
payload: Any,
is_monitored: bool = False,
ttl_seconds: int | None = None,
) -> None:
await self._set_entity("audiodb_album", identifier, payload, is_monitored, ttl_seconds)
async def get_audiodb_album(self, identifier: str) -> dict[str, Any] | None:
return await self._get_entity("audiodb_album", identifier)
async def delete_album(self, musicbrainz_id: str) -> None:
recent_path, persistent_path = self._entity_paths("album", musicbrainz_id)
await asyncio.to_thread(self._delete_file_pair, recent_path)
await asyncio.to_thread(self._delete_file_pair, persistent_path)
async def delete_artist(self, musicbrainz_id: str) -> None:
recent_path, persistent_path = self._entity_paths("artist", musicbrainz_id)
await asyncio.to_thread(self._delete_file_pair, recent_path)
await asyncio.to_thread(self._delete_file_pair, persistent_path)
async def delete_entity(self, entity_type: str, identifier: str) -> None:
recent_path, persistent_path = self._entity_paths(entity_type, identifier)
await asyncio.to_thread(self._delete_file_pair, recent_path)
await asyncio.to_thread(self._delete_file_pair, persistent_path)
async def promote_to_persistent(self, identifier: str, identifier_type: str) -> bool:
entity_type = "artist" if identifier_type == "artist" else "album"
recent_path, persistent_path = self._entity_paths(entity_type, identifier)
def operation() -> bool:
if persistent_path.exists():
return True
if not recent_path.exists():
return False
persistent_path.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(recent_path), str(persistent_path))
recent_meta = self._meta_path(recent_path)
persistent_meta = self._meta_path(persistent_path)
if recent_meta.exists():
meta = _decode_json(recent_meta.read_text())
meta.pop("expires_at", None)
meta["last_accessed"] = time.time()
persistent_meta.write_text(_encode_json(meta))
recent_meta.unlink(missing_ok=True)
return True
return await asyncio.to_thread(operation)
async def promote_album_to_persistent(self, musicbrainz_id: str) -> bool:
return await self.promote_to_persistent(musicbrainz_id, "album")
async def promote_artist_to_persistent(self, musicbrainz_id: str) -> bool:
return await self.promote_to_persistent(musicbrainz_id, "artist")
async def cleanup_expired_recent(self) -> int:
def operation() -> int:
removed = 0
for base_dir in (
self._recent_albums_dir,
self._recent_artists_dir,
self._recent_audiodb_artists_dir,
self._recent_audiodb_albums_dir,
):
removed += self._cleanup_expired_directory(base_dir)
return removed
return await asyncio.to_thread(operation)
async def enforce_recent_size_limits(self) -> int:
if self.recent_metadata_max_size_bytes <= 0:
return 0
def operation() -> int:
candidates: list[tuple[float, Path, int]] = []
total_size = 0
for base_dir in (
self._recent_albums_dir,
self._recent_artists_dir,
self._recent_audiodb_artists_dir,
self._recent_audiodb_albums_dir,
):
for data_path in base_dir.glob("*.json"):
if data_path.name.endswith(".meta.json"):
continue
try:
size_bytes = data_path.stat().st_size
except FileNotFoundError:
continue
meta_path = self._meta_path(data_path)
meta: dict[str, Any] = {}
if meta_path.exists():
try:
meta = _decode_json(meta_path.read_text())
except Exception: # noqa: BLE001
meta = {}
last_accessed = float(meta.get("last_accessed", meta.get("created_at", 0.0)) or 0.0)
total_size += size_bytes
candidates.append((last_accessed, data_path, size_bytes))
if total_size <= self.recent_metadata_max_size_bytes:
return 0
bytes_to_free = total_size - self.recent_metadata_max_size_bytes
freed = 0
for _, data_path, size_bytes in sorted(candidates, key=lambda item: item[0]):
self._delete_file_pair(data_path)
freed += size_bytes
if freed >= bytes_to_free:
break
return freed
return await asyncio.to_thread(operation)
async def cleanup_expired_covers(self) -> int:
return await asyncio.to_thread(self._cleanup_expired_directory, self._recent_covers_dir)
async def enforce_cover_size_limits(self) -> int:
return await asyncio.to_thread(
self._enforce_size_limit_for_directory,
self._recent_covers_dir,
self.recent_covers_max_size_bytes,
)
def get_stats(self) -> dict[str, Any]:
total_count = 0
album_count = 0
artist_count = 0
audiodb_artist_count = 0
audiodb_album_count = 0
total_size_bytes = 0
for base_dir, counter_name in (
(self._recent_albums_dir, "album"),
(self._persistent_albums_dir, "album"),
(self._recent_artists_dir, "artist"),
(self._persistent_artists_dir, "artist"),
(self._recent_audiodb_artists_dir, "audiodb_artist"),
(self._persistent_audiodb_artists_dir, "audiodb_artist"),
(self._recent_audiodb_albums_dir, "audiodb_album"),
(self._persistent_audiodb_albums_dir, "audiodb_album"),
):
for data_path in base_dir.glob("*.json"):
if data_path.name.endswith(".meta.json"):
continue
total_count += 1
if counter_name == "album":
album_count += 1
elif counter_name == "artist":
artist_count += 1
elif counter_name == "audiodb_artist":
audiodb_artist_count += 1
elif counter_name == "audiodb_album":
audiodb_album_count += 1
try:
total_size_bytes += data_path.stat().st_size
except FileNotFoundError:
pass
return {
"total_count": total_count,
"album_count": album_count,
"artist_count": artist_count,
"audiodb_artist_count": audiodb_artist_count,
"audiodb_album_count": audiodb_album_count,
"total_size_bytes": total_size_bytes,
}
async def clear_all(self) -> None:
def operation() -> None:
if self.base_path.exists():
shutil.rmtree(self.base_path)
self._ensure_dirs()
await asyncio.to_thread(operation)
async def clear_audiodb(self) -> None:
def operation() -> None:
for d in (
self._recent_audiodb_artists_dir,
self._recent_audiodb_albums_dir,
self._persistent_audiodb_artists_dir,
self._persistent_audiodb_albums_dir,
):
if d.exists():
shutil.rmtree(d)
self._ensure_dirs()
await asyncio.to_thread(operation)