Studio: support GGUF variant selection for non-suffixed repos (#5023)

* fix: support GGUF variant selection for non-suffixed repos * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: harden GGUF detection across cached models and picker flows * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * chore: use shared GGUF picker helper for search rows * fix: avoid mixed cache duplication and preserve GGUF fallback detection * fix: unify GGUF cache matching and merge picker hints * fix: normalize local GGUF matching across picker and model config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: robust cached-gguf classification + hint-aware click routing - _repo_gguf_size_bytes: treat size_on_disk=None as 0 and dedupe fallback by commit_hash so partial/interrupted downloads don't TypeError out of sum() and wipe the entire cached list. - list_cached_gguf / list_cached_models: narrow per-repo try/except so one malformed repo no longer poisons the whole response. - handleModelClick: route through isKnownGgufRepo instead of the suffix-only isGgufRepo, so non-suffixed GGUF repos still open the variant expander from every call site. - Replace the modelIsGgufById/resultIsGgufById Maps with Sets of known GGUF ids to stop conflating "no hint" with "known not-GGUF". - Make HfModelResult.isGguf required (it is always set in makeMapModel). - Add regression tests for the None size case, mixed-repo inclusion in cached-gguf, and per-repo error isolation. * fix: exclude mmproj from GGUF classification and case-normalize hint lookups - _repo_gguf_size_bytes now filters mmproj vision-adapter files so safetensors+mmproj.gguf repos stay on the cached-models path and non-GGUF rows no longer show zero pickable variants. A vision-capable GGUF repo (main weight + mmproj adapter) still classifies as GGUF and reports the main weight size. - modelGgufIds / resultGgufIds now key on lowercased ids and isKnownGgufRepo lowercases its lookup, so store and HF-search ids that differ only by casing still match the same GGUF hint. - New regression tests: mmproj-only repo excluded from cached-gguf, same repo included in cached-models, vision-capable repo still classified as GGUF with correct size. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Roland Tannous <rolandtannous@gravityq.ai> Co-authored-by: Roland Tannous <115670425+rolandtannous@users.noreply.github.com>
2026-04-21 13:37:39 +00:00 · 2026-04-15 12:32:01 +01:00 · 2026-04-15 12:32:01 +01:00 · f9ef639dde
commit f9ef639dde
parent 13928b5f0e
7 changed files with 621 additions and 90 deletions
--- a/studio/backend/routes/models.py
+++ b/studio/backend/routes/models.py
@ -992,7 +992,7 @@ async def get_gguf_variants(
                    snapshots = entry / "snapshots"
                    if snapshots.is_dir():
                        for snap in snapshots.iterdir():
-                            for f in snap.rglob("*.gguf"):
+                            for f in _iter_gguf_paths(snap):
                                q = _extract_quant_label(f.name)
                                cached_bytes_by_quant[q] = (
                                    cached_bytes_by_quant.get(q, 0) + f.stat().st_size
@ -1061,7 +1061,7 @@ async def get_gguf_download_progress(
        for entry in cache_dir.iterdir():
            if entry.name.lower() == target:
                # Count completed .gguf files matching this variant in snapshots
-                for f in entry.rglob("*.gguf"):
+                for f in _iter_gguf_paths(entry):
                    fname = f.name.lower().replace("-", "").replace("_", "")
                    if not variant_lower or variant_lower in fname:
                        downloaded_bytes += f.stat().st_size
@ -1237,6 +1237,62 @@ def _all_hf_cache_scans():
    return scans


+def _is_gguf_filename(name: str) -> bool:
+    return name.lower().endswith(".gguf")
+
+
+def _is_mmproj_filename(name: str) -> bool:
+    """Match GGUF vision-adapter (mmproj) files. Kept consistent with
+    ``utils.models.model_config._is_mmproj``."""
+    return "mmproj" in name.lower()
+
+
+def _is_main_gguf_filename(name: str) -> bool:
+    """A GGUF file that is a primary weight artifact, not an mmproj
+    vision adapter."""
+    return _is_gguf_filename(name) and not _is_mmproj_filename(name)
+
+
+def _iter_gguf_paths(root: Path):
+    for path in root.rglob("*"):
+        if path.is_file() and _is_gguf_filename(path.name):
+            yield path
+
+
+def _repo_gguf_size_bytes(repo_info) -> int:
+    """Return the total on-disk size of primary GGUF weight files across
+    all revisions, excluding mmproj vision-adapter files.
+
+    Hugging Face hardlinks blobs shared between revisions, so this
+    deduplicates by blob path (or, as a fallback, by revision commit
+    hash + filename) to avoid double-counting the same bytes. Files
+    with an unknown size (``size_on_disk is None``, e.g. a partial or
+    interrupted download) are treated as zero bytes. mmproj files are
+    excluded so that repos whose only ``.gguf`` artifact is a vision
+    adapter are not classified as GGUF repos: the variant selector
+    filters mmproj out and would otherwise show zero pickable variants.
+    """
+    unique_blobs: dict[str, int] = {}
+    for revision in repo_info.revisions:
+        rev_id = getattr(revision, "commit_hash", None) or str(id(revision))
+        for f in revision.files:
+            if _is_main_gguf_filename(f.file_name):
+                blob_path = getattr(f, "blob_path", None)
+                size = f.size_on_disk or 0
+                if blob_path:
+                    unique_blobs[str(blob_path)] = size
+                else:
+                    unique_blobs[f"{rev_id}:{f.file_name}"] = size
+    return sum(unique_blobs.values())
+
+
+def _repo_has_gguf_files(repo_info) -> bool:
+    """Return True when any revision in a cached repo contains a
+    primary GGUF weight file. Repos whose only ``.gguf`` artifact is
+    an mmproj vision adapter are not treated as GGUF here."""
+    return _repo_gguf_size_bytes(repo_info) > 0
+
+
@router.get("/cached-gguf")
 async def list_cached_gguf(
    current_subject: str = Depends(get_current_subject),
@ -1248,28 +1304,25 @@ async def list_cached_gguf(
        seen_lower: dict[str, dict] = {}
        for hf_cache in cache_scans:
            for repo_info in hf_cache.repos:
-                if repo_info.repo_type != "model":
+                try:
+                    if repo_info.repo_type != "model":
+                        continue
+                    repo_id = repo_info.repo_id
+                    total_size = _repo_gguf_size_bytes(repo_info)
+                    if total_size == 0:
+                        continue
+                    key = repo_id.lower()
+                    existing = seen_lower.get(key)
+                    if existing is None or total_size > existing["size_bytes"]:
+                        seen_lower[key] = {
+                            "repo_id": repo_id,
+                            "size_bytes": total_size,
+                            "cache_path": str(repo_info.repo_path),
+                        }
+                except Exception as e:
+                    repo_label = getattr(repo_info, "repo_id", "<unknown>")
+                    logger.warning(f"Skipping cached GGUF repo {repo_label}: {e}")
                    continue
-                repo_id = repo_info.repo_id
-                if not repo_id.upper().endswith("-GGUF"):
-                    continue
-                total_size = 0
-                has_gguf = False
-                for revision in repo_info.revisions:
-                    for f in revision.files:
-                        if f.file_name.endswith(".gguf"):
-                            has_gguf = True
-                            total_size += f.size_on_disk
-                if not has_gguf:
-                    continue
-                key = repo_id.lower()
-                existing = seen_lower.get(key)
-                if existing is None or total_size > existing["size_bytes"]:
-                    seen_lower[key] = {
-                        "repo_id": repo_id,
-                        "size_bytes": total_size,
-                        "cache_path": str(repo_info.repo_path),
-                    }
        cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
        return {"cached": cached}
    except Exception as e:
@ -1290,30 +1343,37 @@ async def list_cached_models(
        seen_lower: dict[str, dict] = {}
        for hf_cache in cache_scans:
            for repo_info in hf_cache.repos:
-                if repo_info.repo_type != "model":
+                try:
+                    if repo_info.repo_type != "model":
+                        continue
+                    repo_id = repo_info.repo_id
+                    if _repo_has_gguf_files(repo_info):
+                        continue
+                    total_size = sum(
+                        (f.size_on_disk or 0)
+                        for rev in repo_info.revisions
+                        for f in rev.files
+                    )
+                    if total_size == 0:
+                        continue
+                    has_weights = any(
+                        f.file_name.endswith(_WEIGHT_EXTENSIONS)
+                        for rev in repo_info.revisions
+                        for f in rev.files
+                    )
+                    if not has_weights:
+                        continue
+                    key = repo_id.lower()
+                    existing = seen_lower.get(key)
+                    if existing is None or total_size > existing["size_bytes"]:
+                        seen_lower[key] = {
+                            "repo_id": repo_id,
+                            "size_bytes": total_size,
+                        }
+                except Exception as e:
+                    repo_label = getattr(repo_info, "repo_id", "<unknown>")
+                    logger.warning(f"Skipping cached model repo {repo_label}: {e}")
                    continue
-                repo_id = repo_info.repo_id
-                if repo_id.upper().endswith("-GGUF"):
-                    continue
-                total_size = sum(
-                    f.size_on_disk for rev in repo_info.revisions for f in rev.files
-                )
-                if total_size == 0:
-                    continue
-                has_weights = any(
-                    f.file_name.endswith(_WEIGHT_EXTENSIONS)
-                    for rev in repo_info.revisions
-                    for f in rev.files
-                )
-                if not has_weights:
-                    continue
-                key = repo_id.lower()
-                existing = seen_lower.get(key)
-                if existing is None or total_size > existing["size_bytes"]:
-                    seen_lower[key] = {
-                        "repo_id": repo_id,
-                        "size_bytes": total_size,
-                    }
        cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
        return {"cached": cached}
    except Exception as e:
@ -1390,7 +1450,7 @@ async def delete_cached_model(
            deleted_count = 0
            for rev in target_repo.revisions:
                for f in rev.files:
-                    if not f.file_name.endswith(".gguf"):
+                    if not _is_gguf_filename(f.file_name):
                        continue
                    quant = _extract_quant_label(f.file_name)
                    if quant.lower() != variant.lower():
--- a/studio/backend/tests/test_cached_gguf_routes.py
+++ b/studio/backend/tests/test_cached_gguf_routes.py
@ -0,0 +1,398 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
+
+import asyncio
+import sys
+import types
+from pathlib import Path
+from types import SimpleNamespace
+
+# Keep this test runnable in lightweight environments where optional logging
+# deps are not installed.
+if "structlog" not in sys.modules:
+
+    class _DummyLogger:
+        def __getattr__(self, _name):
+            return lambda *args, **kwargs: None
+
+    sys.modules["structlog"] = types.SimpleNamespace(
+        BoundLogger = _DummyLogger,
+        get_logger = lambda *args, **kwargs: _DummyLogger(),
+    )
+
+import routes.models as models_route
+
+
+def _repo(
+    repo_id: str,
+    files: list[SimpleNamespace],
+    repo_path: Path,
+    *,
+    revisions: list[SimpleNamespace] | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        repo_id = repo_id,
+        repo_type = "model",
+        repo_path = repo_path,
+        revisions = revisions or [SimpleNamespace(files = files)],
+    )
+
+
+def _file(
+    name: str,
+    size_on_disk: int,
+    *,
+    blob_path: str | None = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        file_name = name,
+        size_on_disk = size_on_disk,
+        blob_path = blob_path,
+    )
+
+
+def test_iter_gguf_paths_matches_extension_case_insensitively(tmp_path):
+    nested = tmp_path / "snapshots" / "rev"
+    nested.mkdir(parents = True)
+    lower = nested / "Q4_K_M.gguf"
+    upper = nested / "Q8_0.GGUF"
+    other = nested / "README.md"
+    lower.write_text("a")
+    upper.write_text("b")
+    other.write_text("c")
+
+    result = sorted(path.name for path in models_route._iter_gguf_paths(tmp_path))
+
+    assert result == ["Q4_K_M.gguf", "Q8_0.GGUF"]
+
+
+def test_list_cached_gguf_includes_non_suffix_repo_when_cache_contains_gguf(
+    monkeypatch, tmp_path
+):
+    repo = _repo(
+        "HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
+        [_file("Q4_K_M.gguf", 5_000), _file("README.md", 10)],
+        tmp_path / "models--HauhauCS--Gemma",
+    )
+    scan = SimpleNamespace(repos = [repo])
+
+    monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
+            "size_bytes": 5_000,
+            "cache_path": str(repo.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_matches_extension_case_insensitively(monkeypatch, tmp_path):
+    repo = _repo(
+        "Org/Model-Without-Suffix",
+        [_file("Q8_0.GGUF", 7_000)],
+        tmp_path / "models--Org--Model-Without-Suffix",
+    )
+    scan = SimpleNamespace(repos = [repo])
+
+    monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/Model-Without-Suffix",
+            "size_bytes": 7_000,
+            "cache_path": str(repo.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_skips_repos_without_positive_gguf_size(monkeypatch, tmp_path):
+    missing = _repo(
+        "Org/ReadmeOnly",
+        [_file("README.md", 10)],
+        tmp_path / "models--Org--ReadmeOnly",
+    )
+    zero = _repo(
+        "Org/ZeroSize",
+        [_file("Q4_K_M.gguf", 0)],
+        tmp_path / "models--Org--ZeroSize",
+    )
+    scan = SimpleNamespace(repos = [missing, zero])
+
+    monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == []
+
+
+def test_list_cached_gguf_keeps_largest_duplicate_repo_across_scans(
+    monkeypatch, tmp_path
+):
+    smaller = _repo(
+        "Org/Dupe",
+        [_file("Q4_K_M.gguf", 2_000)],
+        tmp_path / "models--Org--Dupe-a",
+    )
+    larger = _repo(
+        "org/dupe",
+        [_file("Q4_K_M.gguf", 5_000), _file("Q6_K.gguf", 1_000)],
+        tmp_path / "models--Org--Dupe-b",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [
+            SimpleNamespace(repos = [smaller]),
+            SimpleNamespace(repos = [larger]),
+        ],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "org/dupe",
+            "size_bytes": 6_000,
+            "cache_path": str(larger.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_dedupes_shared_blobs_across_revisions(monkeypatch, tmp_path):
+    shared = "blobs/shared-q4"
+    repo = _repo(
+        "Org/SharedBlobRepo",
+        [],
+        tmp_path / "models--Org--SharedBlobRepo",
+        revisions = [
+            SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
+            SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
+        ],
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [repo])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/SharedBlobRepo",
+            "size_bytes": 5_000,
+            "cache_path": str(repo.repo_path),
+        }
+    ]
+
+
+def test_list_cached_models_skips_non_suffix_repo_when_gguf_files_exist(
+    monkeypatch, tmp_path
+):
+    mixed = _repo(
+        "Org/MixedRepo",
+        [
+            _file("Q4_K_M.gguf", 5_000),
+            _file("model.safetensors", 10_000),
+        ],
+        tmp_path / "models--Org--MixedRepo",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [mixed])],
+    )
+
+    result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
+
+    assert result["cached"] == []
+
+
+def test_list_cached_gguf_includes_mixed_repo_with_gguf_and_safetensors(
+    monkeypatch, tmp_path
+):
+    """Mirror of the _skips_ test: the mixed repo should still surface in
+    cached-gguf so the picker can show it as a GGUF download."""
+    mixed = _repo(
+        "Org/MixedRepo",
+        [
+            _file("Q4_K_M.gguf", 5_000),
+            _file("model.safetensors", 10_000),
+        ],
+        tmp_path / "models--Org--MixedRepo",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [mixed])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/MixedRepo",
+            "size_bytes": 5_000,
+            "cache_path": str(mixed.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_handles_none_size_on_disk(monkeypatch, tmp_path):
+    """A partial/interrupted GGUF download has ``size_on_disk = None``. The
+    route must treat the unknown bytes as zero instead of raising TypeError
+    out of ``sum()`` and wiping the entire response."""
+    partial = _repo(
+        "Org/PartialDownload",
+        [_file("Q4_K_M.gguf", None), _file("Q6_K.gguf", 5_000)],
+        tmp_path / "models--Org--PartialDownload",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [partial])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/PartialDownload",
+            "size_bytes": 5_000,
+            "cache_path": str(partial.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_skips_malformed_repo_without_wiping_response(
+    monkeypatch, tmp_path
+):
+    """One repo raising during classification must not poison the response
+    for every other repo in the scan."""
+
+    class _ExplodingRepo:
+        repo_id = "Org/Broken"
+        repo_type = "model"
+        repo_path = tmp_path / "models--Org--Broken"
+
+        @property
+        def revisions(self):
+            raise RuntimeError("boom")
+
+    healthy = _repo(
+        "Org/Healthy",
+        [_file("Q4_K_M.gguf", 5_000)],
+        tmp_path / "models--Org--Healthy",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [_ExplodingRepo(), healthy])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/Healthy",
+            "size_bytes": 5_000,
+            "cache_path": str(healthy.repo_path),
+        }
+    ]
+
+
+def test_list_cached_gguf_skips_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
+    """A repo whose only ``.gguf`` artifact is an mmproj vision adapter
+    must not be classified as a GGUF repo: the variant selector filters
+    mmproj out and the picker would otherwise show zero variants."""
+    mmproj_only = _repo(
+        "Org/MmprojOnly",
+        [
+            _file("mmproj-Q8_0.gguf", 5_000),
+            _file("model.safetensors", 10_000),
+        ],
+        tmp_path / "models--Org--MmprojOnly",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [mmproj_only])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == []
+
+
+def test_list_cached_models_includes_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
+    """Mirror of the cached-gguf skip: a safetensors repo with an
+    auxiliary mmproj vision adapter must still surface in cached-models
+    so the user can load it as a normal model."""
+    mmproj_aux = _repo(
+        "Org/MmprojAux",
+        [
+            _file("mmproj-Q8_0.gguf", 5_000),
+            _file("model.safetensors", 10_000),
+        ],
+        tmp_path / "models--Org--MmprojAux",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [mmproj_aux])],
+    )
+
+    result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/MmprojAux",
+            "size_bytes": 15_000,
+        }
+    ]
+
+
+def test_list_cached_gguf_includes_vision_repo_with_main_gguf_and_mmproj(
+    monkeypatch, tmp_path
+):
+    """A vision-capable GGUF repo (main weight + mmproj adapter) is still
+    a GGUF repo. The reported size is the main weight size; mmproj is
+    excluded from the GGUF-size accounting because it is filtered out at
+    classification time."""
+    vision_repo = _repo(
+        "Org/VisionGguf",
+        [
+            _file("Q4_K_M.gguf", 5_000),
+            _file("mmproj-Q8_0.gguf", 1_000),
+        ],
+        tmp_path / "models--Org--VisionGguf",
+    )
+
+    monkeypatch.setattr(
+        models_route,
+        "_all_hf_cache_scans",
+        lambda: [SimpleNamespace(repos = [vision_repo])],
+    )
+
+    result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
+
+    assert result["cached"] == [
+        {
+            "repo_id": "Org/VisionGguf",
+            "size_bytes": 5_000,
+            "cache_path": str(vision_repo.repo_path),
+        }
+    ]
--- a/studio/backend/utils/models/model_config.py
+++ b/studio/backend/utils/models/model_config.py
@ -904,6 +904,18 @@ def _is_mmproj(filename: str) -> bool:
    return "mmproj" in filename.lower()


+def _is_gguf_filename(filename: str) -> bool:
+    return filename.lower().endswith(".gguf")
+
+
+def _iter_gguf_files(directory: Path):
+    if not directory.is_dir():
+        return
+    for f in directory.iterdir():
+        if f.is_file() and _is_gguf_filename(f.name):
+            yield f
+
+
 def detect_mmproj_file(path: str) -> Optional[str]:
    """
    Find the mmproj (vision projection) GGUF file in a directory.
@ -919,7 +931,7 @@ def detect_mmproj_file(path: str) -> Optional[str]:
    if not search_dir.is_dir():
        return None

-    for f in search_dir.glob("*.gguf"):
+    for f in _iter_gguf_files(search_dir):
        if _is_mmproj(f.name):
            return str(f.resolve())
    return None
@ -942,7 +954,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
    p = Path(path)

    # Case 1: direct .gguf file
-    if p.suffix == ".gguf" and p.is_file():
+    if p.suffix.lower() == ".gguf" and p.is_file():
        if _is_mmproj(p.name):
            return None
        return str(p.resolve())
@ -950,7 +962,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
    # Case 2: directory containing .gguf files (skip mmproj)
    if p.is_dir():
        gguf_files = sorted(
-            (f for f in p.glob("*.gguf") if not _is_mmproj(f.name)),
+            (f for f in _iter_gguf_files(p) if not _is_mmproj(f.name)),
            key = lambda f: f.stat().st_size,
            reverse = True,
        )
@ -1015,7 +1027,7 @@ def _pick_best_gguf(filenames: list[str]) -> Optional[str]:
    Prefers quantization levels in _GGUF_QUANT_PREFERENCE order.
    Falls back to the first .gguf file found.
    """
-    gguf_files = [f for f in filenames if f.endswith(".gguf")]
+    gguf_files = [f for f in filenames if f.lower().endswith(".gguf")]
    if not gguf_files:
        return None

@ -1100,7 +1112,7 @@ def list_gguf_variants(

    for sibling in info.siblings:
        fname = sibling.rfilename
-        if not fname.endswith(".gguf"):
+        if not fname.lower().endswith(".gguf"):
            continue
        size = sibling.size or 0

@ -1171,7 +1183,7 @@ def list_local_gguf_variants(
    quant_first_file: dict[str, str] = {}
    has_vision = False

-    for f in sorted(p.glob("*.gguf")):
+    for f in sorted(_iter_gguf_files(p)):
        if _is_mmproj(f.name):
            has_vision = True
            continue
@ -1210,7 +1222,7 @@ def _find_local_gguf_by_variant(directory: str, variant: str) -> Optional[str]:

    matches = sorted(
        f
-        for f in p.glob("*.gguf")
+        for f in _iter_gguf_files(p)
        if not _is_mmproj(f.name) and _extract_quant_label(f.name) == variant
    )
    if matches:
@ -1438,7 +1450,9 @@ def scan_exported_models(

            # Check for flat GGUF export (e.g. exports/gemma-3-4b-it-finetune-gguf/)
            # Filter out mmproj (vision projection) files — they aren't loadable as main models
-            gguf_files = [f for f in run_dir.glob("*.gguf") if not _is_mmproj(f.name)]
+            gguf_files = [
+                f for f in _iter_gguf_files(run_dir) if not _is_mmproj(f.name)
+            ]
            if gguf_files:
                base_model = None
                export_meta = run_dir / "export_metadata.json"
@ -1465,7 +1479,7 @@ def scan_exported_models(
                has_weights = any(checkpoint_dir.glob("*.safetensors")) or any(
                    checkpoint_dir.glob("*.bin")
                )
-                has_gguf = any(checkpoint_dir.glob("*.gguf"))
+                has_gguf = any(_iter_gguf_files(checkpoint_dir))

                base_model = None
                export_type = None
@ -1488,7 +1502,7 @@ def scan_exported_models(
                        pass
                elif has_gguf:
                    export_type = "gguf"
-                    gguf_list = list(checkpoint_dir.glob("*.gguf"))
+                    gguf_list = list(_iter_gguf_files(checkpoint_dir))
                    # Check checkpoint_dir first, then fall back to parent run_dir
                    # (export.py writes metadata to the top-level export directory)
                    for meta_dir in (checkpoint_dir, run_dir):
--- a/studio/frontend/src/components/assistant-ui/model-selector/pickers.tsx
+++ b/studio/frontend/src/components/assistant-ui/model-selector/pickers.tsx
@ -399,10 +399,14 @@ function GgufVariantExpander({
  );
 }

-// ── Detect GGUF repos by naming convention ────────────────────
+// ── Detect GGUF repos by naming convention or hub tag ────────────────────

-function isGgufRepo(id: string): boolean {
-  return id.toUpperCase().includes("-GGUF");
+function hasGgufSuffix(id: string): boolean {
+  return /-GGUF(?:$|-)/i.test(id);
+}
+
+function isGgufRepo(id: string, hintedIsGguf?: boolean): boolean {
+  return Boolean(hintedIsGguf) || hasGgufSuffix(id);
 }

 /** Extract param count label from model name (e.g. "Qwen3-0.6B" -> "0.6B"). */
@ -451,6 +455,33 @@ export function HubModelPicker({
  const { results, isLoading, isLoadingMore, fetchMore } =
    useHfModelSearch(debouncedQuery);

+  // Sets of lowercased repo ids that the store or HF search have
+  // confirmed are GGUF. Absence means "no hint" and lets hasGgufSuffix
+  // take over as fallback, rather than conflating unknown with known-
+  // not-GGUF. Keys are lowercased so that store IDs and HF search IDs
+  // that differ only by casing still match the same hint.
+  const modelGgufIds = useMemo(() => {
+    const ids = new Set<string>();
+    for (const model of models) {
+      if (model.isGguf) ids.add(model.id.toLowerCase());
+    }
+    return ids;
+  }, [models]);
+  const resultGgufIds = useMemo(() => {
+    const ids = new Set<string>();
+    for (const result of results) {
+      if (result.isGguf) ids.add(result.id.toLowerCase());
+    }
+    return ids;
+  }, [results]);
+  const isKnownGgufRepo = useCallback(
+    (id: string): boolean => {
+      const key = id.toLowerCase();
+      return isGgufRepo(id, resultGgufIds.has(key) || modelGgufIds.has(key));
+    },
+    [modelGgufIds, resultGgufIds],
+  );
+
  // Track which GGUF repo is expanded for variant selection
  const [expandedGguf, setExpandedGguf] = useState<string | null>(null);

@ -625,17 +656,17 @@ export function HubModelPicker({
  const recommendedIds = useMemo(() => {
    const all = dedupe([...models.map((model) => model.id), value ?? ""])
      .filter((id) => !downloadedSet.has(id.toLowerCase()))
-      .filter((id) => !chatOnly || isGgufRepo(id))
+      .filter((id) => !chatOnly || isKnownGgufRepo(id))
      .filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
    // Sort: GGUFs first, then hub models
    const gguf: string[] = [];
    const hub: string[] = [];
    for (const id of all) {
-      if (isGgufRepo(id)) gguf.push(id);
+      if (isKnownGgufRepo(id)) gguf.push(id);
      else hub.push(id);
    }
    return [...gguf, ...hub];
-  }, [models, value, downloadedSet, chatOnly]);
+  }, [models, value, downloadedSet, chatOnly, isKnownGgufRepo]);

  // Infinite scroll paging for the recommended section
  const [recommendedPage, setRecommendedPage] = useState(1);
@ -645,7 +676,7 @@ export function HubModelPicker({
  }, [models, chatOnly]);

  const visibleRecommendedIds = useMemo(() => {
-    const hubStartIndex = recommendedIds.findIndex((id) => !isGgufRepo(id));
+    const hubStartIndex = recommendedIds.findIndex((id) => !isKnownGgufRepo(id));
    const allGguf =
      hubStartIndex === -1
        ? recommendedIds
@ -659,7 +690,7 @@ export function HubModelPicker({
      result.push(...allHub.slice(p * 4, (p + 1) * 4));
    }
    return result;
-  }, [recommendedIds, recommendedPage]);
+  }, [recommendedIds, recommendedPage, isKnownGgufRepo]);

  const hasMoreRecommended =
    visibleRecommendedIds.length < recommendedIds.length;
@ -681,8 +712,8 @@ export function HubModelPicker({
    const ids = showHfSection
      ? [...new Set([...visibleRecommendedIds, ...filteredRecommendedIds])]
      : visibleRecommendedIds;
-    return ids.filter((id) => !isGgufRepo(id));
-  }, [visibleRecommendedIds, showHfSection, filteredRecommendedIds]);
+    return ids.filter((id) => !isKnownGgufRepo(id));
+  }, [visibleRecommendedIds, showHfSection, filteredRecommendedIds, isKnownGgufRepo]);
  const { paramCountById: recommendedParamCountById } =
    useRecommendedModelVram(idsForVram);

@ -697,9 +728,9 @@ export function HubModelPicker({
    return results
      .map((result) => result.id)
      .filter((id) => !recommendedSet.has(id))
-      .filter((id) => !chatOnly || isGgufRepo(id))
+      .filter((id) => !chatOnly || isKnownGgufRepo(id))
      .filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
-  }, [recommendedSet, results, showHfSection, chatOnly]);
+  }, [recommendedSet, results, showHfSection, chatOnly, isKnownGgufRepo]);

  const metricsById = useMemo(
    () =>
@ -800,14 +831,14 @@ export function HubModelPicker({
  /** Handle clicking a model row — GGUF repos expand, others load directly. */
  const handleModelClick = useCallback(
    (id: string) => {
-      if (isGgufRepo(id)) {
+      if (isKnownGgufRepo(id)) {
        // Toggle GGUF variant expander
        setExpandedGguf((prev) => (prev === id ? null : id));
      } else {
        onSelect(id, { source: "hub", isLora: false });
      }
    },
-    [onSelect],
+    [onSelect, isKnownGgufRepo],
  );

  return (
@ -848,7 +879,11 @@ export function HubModelPicker({
                    label={c.repo_id}
                    meta={`GGUF · ${formatBytes(c.size_bytes)}`}
                    selected={value === c.repo_id}
-                    onClick={() => handleModelClick(c.repo_id)}
+                    onClick={() =>
+                      setExpandedGguf((prev) =>
+                        prev === c.repo_id ? null : c.repo_id,
+                      )
+                    }
                    vramStatus={null}
                  />
                  {expandedGguf === c.repo_id && (
@ -909,7 +944,7 @@ export function HubModelPicker({
                    <ModelRow
                      label={m.model_id ?? m.display_name}
                      meta={
-                        isGguf || m.path.endsWith(".gguf") ? "GGUF" : "Local"
+                        isGguf || m.path.toLowerCase().endsWith(".gguf") ? "GGUF" : "Local"
                      }
                      selected={value === m.id}
                      onClick={() => {
@ -1036,7 +1071,7 @@ export function HubModelPicker({
                const isGguf =
                  isGgufRepo(m.id) ||
                  isGgufRepo(m.display_name) ||
-                  m.path.endsWith(".gguf");
+                  m.path.toLowerCase().endsWith(".gguf");
                return (
                  <div key={m.id}>
                    <ModelRow
@ -1089,16 +1124,22 @@ export function HubModelPicker({
                      <ModelRow
                        label={id}
                        meta={
-                          isGgufRepo(id)
+                          isKnownGgufRepo(id)
                            ? "GGUF"
                            : (vram?.detail ?? extractParamLabel(id))
                        }
                        selected={value === id}
-                        onClick={() => handleModelClick(id)}
+                        onClick={() => {
+                          if (isKnownGgufRepo(id)) {
+                            setExpandedGguf((prev) => (prev === id ? null : id));
+                          } else {
+                            handleModelClick(id);
+                          }
+                        }}
                        vramStatus={
-                          isGgufRepo(id) ? null : (vram?.status ?? null)
+                          isKnownGgufRepo(id) ? null : (vram?.status ?? null)
                        }
-                        vramEst={isGgufRepo(id) ? undefined : vram?.est}
+                        vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
                        gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                      />
                      {expandedGguf === id && (
@ -1136,16 +1177,22 @@ export function HubModelPicker({
                    <ModelRow
                      label={id}
                      meta={
-                        isGgufRepo(id)
+                        isKnownGgufRepo(id)
                          ? "GGUF"
                          : (vram?.detail ?? extractParamLabel(id))
                      }
                      selected={value === id}
-                      onClick={() => handleModelClick(id)}
+                      onClick={() => {
+                        if (isKnownGgufRepo(id)) {
+                          setExpandedGguf((prev) => (prev === id ? null : id));
+                        } else {
+                          handleModelClick(id);
+                        }
+                      }}
                      vramStatus={
-                        isGgufRepo(id) ? null : (vram?.status ?? null)
+                        isKnownGgufRepo(id) ? null : (vram?.status ?? null)
                      }
-                      vramEst={isGgufRepo(id) ? undefined : vram?.est}
+                      vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
                      gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                    />
                    {expandedGguf === id && (
@ -1175,24 +1222,31 @@ export function HubModelPicker({
                    No matching models.
                  </div>
                ) : null
-              ) : (
+                ) : (
                hfIds.map((id) => {
                  const vram = vramMap.get(id);
+                  const isSearchGguf = isKnownGgufRepo(id);
                  return (
                    <div key={id}>
                      <ModelRow
                        label={id}
                        meta={
-                          isGgufRepo(id)
+                          isSearchGguf
                            ? "GGUF"
                            : (metricsById.get(id) ?? extractParamLabel(id))
                        }
                        selected={value === id}
-                        onClick={() => handleModelClick(id)}
+                        onClick={() => {
+                          if (isSearchGguf) {
+                            setExpandedGguf((prev) => (prev === id ? null : id));
+                          } else {
+                            handleModelClick(id);
+                          }
+                        }}
                        vramStatus={
-                          isGgufRepo(id) ? null : (vram?.status ?? null)
+                          isSearchGguf ? null : (vram?.status ?? null)
                        }
-                        vramEst={isGgufRepo(id) ? undefined : vram?.est}
+                        vramEst={isSearchGguf ? undefined : vram?.est}
                        gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                      />
                      {expandedGguf === id && (
--- a/studio/frontend/src/components/assistant-ui/model-selector/types.ts
+++ b/studio/frontend/src/components/assistant-ui/model-selector/types.ts
@ -8,6 +8,7 @@ export interface ModelOption {
  name: string;
  description?: string;
  icon?: ReactNode;
+  isGguf?: boolean;
 }

 export interface LoraModelOption extends ModelOption {
@ -24,4 +25,3 @@ export interface ModelSelectorChangeMeta {
  isDownloaded?: boolean;
  expectedBytes?: number;
 }
-
--- a/studio/frontend/src/features/chat/chat-page.tsx
+++ b/studio/frontend/src/features/chat/chat-page.tsx
@ -691,6 +691,7 @@ export function ChatPage(): ReactElement {
        id: model.id,
        name: model.name,
        description: model.description,
+        isGguf: model.isGguf,
      })),
    [modelsFromStore],
  );
--- a/studio/frontend/src/hooks/use-hf-model-search.ts
+++ b/studio/frontend/src/hooks/use-hf-model-search.ts
@ -13,6 +13,7 @@ export interface HfModelResult {
  likes: number;
  totalParams?: number;
  estimatedSizeBytes?: number;
+  isGguf: boolean;
 }

 const EXCLUDED_TAGS = new Set([
@ -89,7 +90,10 @@ function makeMapModel(excludeGguf: boolean) {
    if (!isEmbedding && m.tags?.some((t) => EXCLUDED_TAGS.has(t))) {
      return null;
    }
-    if (excludeGguf && m.tags?.includes("gguf")) {
+    const isGguf =
+      Boolean(m.tags?.some((tag) => tag.toLowerCase() === "gguf")) ||
+      /-GGUF(?:$|-)/i.test(m.name);
+    if (excludeGguf && isGguf) {
      return null;
    }
    return {
@ -98,6 +102,7 @@ function makeMapModel(excludeGguf: boolean) {
      likes: m.likes,
      totalParams: m.safetensors?.total,
      estimatedSizeBytes: estimateSizeFromDtypes(m.safetensors?.parameters),
+      isGguf,
    };
  };
 }
@ -330,4 +335,3 @@ export function useHfModelSearch(

  return { ...search, results };
 }
-