hotfix: prevent duplicate artist discovery precache execution (#23)

This commit is contained in:
Harvey 2026-04-05 17:21:30 +01:00 committed by GitHub
parent 87d56c2171
commit 6855adc0af
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 235 additions and 11 deletions

View file

@ -80,7 +80,7 @@ backend-test-infra-hardening: $(BACKEND_VENV_STAMP) ## Run infrastructure harden
cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/infrastructure/test_circuit_breaker_sync.py tests/infrastructure/test_disk_cache_periodic.py tests/infrastructure/test_retry_non_breaking.py
backend-test-discovery-precache: $(BACKEND_VENV_STAMP) ## Run artist discovery precache tests
cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/services/test_discovery_precache_progress.py tests/infrastructure/test_retry_non_breaking.py -v
cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/services/test_discovery_precache_progress.py tests/services/test_discovery_precache_lock.py tests/infrastructure/test_retry_non_breaking.py -v
backend-test-library-pagination: $(BACKEND_VENV_STAMP) ## Run library pagination tests
cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/infrastructure/test_library_pagination.py -v

View file

@ -376,7 +376,7 @@ async def warm_artist_discovery_cache_periodically(
interval: int = 14400,
delay: float = 0.5,
) -> None:
await asyncio.sleep(60)
await asyncio.sleep(300) # Wait for Phase 1.5 sync to finish first
while True:
try:

View file

@ -288,7 +288,7 @@ class MusicBrainzAlbumMixin:
cache_key = f"{MB_RELEASE_TO_RG_PREFIX}{release_id}"
cached = await self._cache.get(cache_key)
if cached is not None:
logger.info(f"[MB] Cache hit for release {release_id[:8]}: '{cached[:8] if cached else 'empty'}'")
logger.debug(f"[MB] Cache hit for release {release_id[:8]}: '{cached[:8] if cached else 'empty'}'")
return cached if cached != "" else None
dedupe_key = f"{MB_RELEASE_TO_RG_PREFIX}{release_id}"
@ -303,7 +303,7 @@ class MusicBrainzAlbumMixin:
cache_key: str,
) -> str | None:
try:
logger.info(f"[MB] Fetching release group for release {release_id[:8]}")
logger.debug(f"[MB] Fetching release group for release {release_id[:8]}")
result = await mb_api_get(
f"/release/{release_id}",
params={"inc": "release-groups+recordings"},
@ -312,7 +312,7 @@ class MusicBrainzAlbumMixin:
)
rg = result.release_group
rg_id = rg.get("id")
logger.info(f"[MB] Resolved release {release_id[:8]} -> release_group {rg_id}")
logger.debug(f"[MB] Resolved release {release_id[:8]} -> release_group {rg_id}")
await self._cache.set(cache_key, rg_id or "", ttl_seconds=86400)
positions: dict[str, list[int]] = {}

View file

@ -26,6 +26,9 @@ DEFAULT_SIMILAR_COUNT = 15
DEFAULT_TOP_SONGS_COUNT = 10
DEFAULT_TOP_ALBUMS_COUNT = 10
# Module-level flag survives singleton cache invalidation / instance recreation
_discovery_precache_running = False
class ArtistDiscoveryService:
def __init__(
@ -406,6 +409,27 @@ class ArtistDiscoveryService:
delay: float = 0.5,
status_service: Any = None,
mbid_to_name: dict[str, str] | None = None,
) -> int:
global _discovery_precache_running
if _discovery_precache_running:
logger.info("Discovery precache already running, skipping duplicate invocation")
return 0
_discovery_precache_running = True
try:
return await self._do_precache_artist_discovery(
artist_mbids, delay=delay,
status_service=status_service, mbid_to_name=mbid_to_name,
)
finally:
_discovery_precache_running = False
async def _do_precache_artist_discovery(
self,
artist_mbids: list[str],
delay: float = 0.5,
status_service: Any = None,
mbid_to_name: dict[str, str] | None = None,
) -> int:
sources: list[Literal["listenbrainz", "lastfm"]] = []
if self._lb_repo.is_configured():
@ -469,8 +493,9 @@ class ArtistDiscoveryService:
async with counter_lock:
source_fetches += 1
if delay > 0:
await asyncio.sleep(delay)
# Sleep inside semaphore to hold slot and throttle API calls
if delay > 0:
await asyncio.sleep(delay)
async with counter_lock:
cached_count += 1

View file

@ -0,0 +1,192 @@
"""Tests for discovery precache double-execution prevention and throttling."""
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from services.artist_discovery_service import ArtistDiscoveryService
import services.artist_discovery_service as _ads_module
@pytest.fixture(autouse=True)
def _reset_precache_flag():
_ads_module._discovery_precache_running = False
yield
_ads_module._discovery_precache_running = False
def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False):
lb_repo = MagicMock()
lb_repo.is_configured.return_value = lb_configured
lastfm_repo = MagicMock() if lastfm_enabled else None
prefs = MagicMock()
prefs.is_lastfm_enabled.return_value = lastfm_enabled
advanced = MagicMock()
advanced.artist_discovery_precache_concurrency = 2
prefs.get_advanced_settings.return_value = advanced
cache = AsyncMock()
cache.get = AsyncMock(return_value=None)
cache.set = AsyncMock()
library_db = AsyncMock()
library_db.get_all_artist_mbids = AsyncMock(return_value=set())
svc = ArtistDiscoveryService(
listenbrainz_repo=lb_repo,
musicbrainz_repo=MagicMock(),
library_db=library_db,
lidarr_repo=MagicMock(),
memory_cache=cache,
lastfm_repo=lastfm_repo,
preferences_service=prefs,
)
return svc
@pytest.mark.asyncio
async def test_duplicate_invocation_skipped():
"""Second call returns 0 immediately when precache is already running."""
svc = _make_service()
gate = asyncio.Event()
async def slow_similar(*args, **kwargs):
await gate.wait()
return MagicMock()
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=slow_similar),
patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()),
):
task1 = asyncio.create_task(
svc.precache_artist_discovery(["mbid-a"], delay=0)
)
await asyncio.sleep(0.01)
assert _ads_module._discovery_precache_running is True
result2 = await svc.precache_artist_discovery(["mbid-b"], delay=0)
assert result2 == 0
gate.set()
result1 = await task1
assert result1 >= 0
assert _ads_module._discovery_precache_running is False
@pytest.mark.asyncio
async def test_lock_released_after_exception():
"""Flag is cleared even when precache raises an unexpected error."""
svc = _make_service()
with patch.object(
svc, "_do_precache_artist_discovery",
new_callable=AsyncMock,
side_effect=RuntimeError("boom"),
):
with pytest.raises(RuntimeError, match="boom"):
await svc.precache_artist_discovery(["mbid-a"], delay=0)
assert _ads_module._discovery_precache_running is False
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()),
):
result = await svc.precache_artist_discovery(["mbid-a"], delay=0)
assert result >= 0
@pytest.mark.asyncio
async def test_delay_holds_semaphore_slot():
"""Delay is applied inside the semaphore, blocking other artists from starting."""
svc = _make_service()
timestamps: list[float] = []
loop = asyncio.get_event_loop()
async def track_similar(*args, **kwargs):
timestamps.append(loop.time())
return MagicMock()
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=track_similar),
patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()),
):
await svc.precache_artist_discovery(
["mbid-a", "mbid-b", "mbid-c", "mbid-d"],
delay=0.15,
)
assert len(timestamps) == 4
# With concurrency=2 and delay=0.15s inside semaphore, the 3rd artist
# cannot start until one of the first two finishes its delay.
# The gap between the 2nd and 3rd timestamps should be >= delay.
sorted_ts = sorted(timestamps)
gap = sorted_ts[2] - sorted_ts[0]
assert gap >= 0.1, f"Expected >=0.1s gap due to semaphore-held delay, got {gap:.3f}s"
@pytest.mark.asyncio
async def test_cached_artists_skip_api_calls():
"""Artists with all cache keys populated skip API fetches entirely."""
svc = _make_service()
call_count = 0
async def counting_similar(*args, **kwargs):
nonlocal call_count
call_count += 1
return MagicMock()
svc._cache.get = AsyncMock(return_value=MagicMock())
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=counting_similar),
patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()),
):
result = await svc.precache_artist_discovery(
["mbid-a", "mbid-b"],
delay=0,
)
assert call_count == 0, "Expected no API calls when all cache keys are populated"
assert result == 2
@pytest.mark.asyncio
async def test_guard_survives_instance_recreation():
"""Module-level flag prevents overlap even when a new service instance is created."""
svc1 = _make_service()
svc2 = _make_service()
gate = asyncio.Event()
async def slow_similar(*args, **kwargs):
await gate.wait()
return MagicMock()
with (
patch.object(svc1, "get_similar_artists", new_callable=AsyncMock, side_effect=slow_similar),
patch.object(svc1, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()),
patch.object(svc1, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()),
):
task1 = asyncio.create_task(
svc1.precache_artist_discovery(["mbid-a"], delay=0)
)
await asyncio.sleep(0.01)
result2 = await svc2.precache_artist_discovery(["mbid-b"], delay=0)
assert result2 == 0, "Second instance should be blocked by module-level flag"
gate.set()
await task1
assert _ads_module._discovery_precache_running is False

View file

@ -15,6 +15,9 @@ def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False):
lastfm_repo = MagicMock() if lastfm_enabled else None
prefs = MagicMock()
prefs.is_lastfm_enabled.return_value = lastfm_enabled
advanced = MagicMock()
advanced.artist_discovery_precache_concurrency = 3
prefs.get_advanced_settings.return_value = advanced
cache = AsyncMock()
cache.get = AsyncMock(return_value=None)
@ -38,8 +41,9 @@ def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False):
@pytest.mark.asyncio
async def test_progress_updates_on_success():
svc = _make_service()
status = AsyncMock()
status = MagicMock()
status.update_progress = AsyncMock()
status.is_cancelled = MagicMock(return_value=False)
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, return_value=MagicMock()),
@ -61,8 +65,9 @@ async def test_progress_updates_on_success():
@pytest.mark.asyncio
async def test_progress_updates_even_on_failure():
svc = _make_service()
status = AsyncMock()
status = MagicMock()
status.update_progress = AsyncMock()
status.is_cancelled = MagicMock(return_value=False)
with (
patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=RuntimeError("boom")),
@ -85,8 +90,9 @@ async def test_progress_updates_even_on_failure():
@pytest.mark.asyncio
async def test_progress_updates_on_mixed_success_and_failure():
svc = _make_service()
status = AsyncMock()
status = MagicMock()
status.update_progress = AsyncMock()
status.is_cancelled = MagicMock(return_value=False)
call_count = 0
@ -114,8 +120,9 @@ async def test_progress_updates_on_mixed_success_and_failure():
@pytest.mark.asyncio
async def test_cached_artists_still_update_progress():
svc = _make_service()
status = AsyncMock()
status = MagicMock()
status.update_progress = AsyncMock()
status.is_cancelled = MagicMock(return_value=False)
svc._cache.get = AsyncMock(return_value="cached-value")