Studio: support GGUF variant selection for non-suffixed repos (#5023)

* fix: support GGUF variant selection for non-suffixed repos

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix: harden GGUF detection across cached models and picker flows

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* chore: use shared GGUF picker helper for search rows

* fix: avoid mixed cache duplication and preserve GGUF fallback detection

* fix: unify GGUF cache matching and merge picker hints

* fix: normalize local GGUF matching across picker and model config

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix: robust cached-gguf classification + hint-aware click routing

- _repo_gguf_size_bytes: treat size_on_disk=None as 0 and dedupe fallback
  by commit_hash so partial/interrupted downloads don't TypeError out of
  sum() and wipe the entire cached list.
- list_cached_gguf / list_cached_models: narrow per-repo try/except so
  one malformed repo no longer poisons the whole response.
- handleModelClick: route through isKnownGgufRepo instead of the
  suffix-only isGgufRepo, so non-suffixed GGUF repos still open the
  variant expander from every call site.
- Replace the modelIsGgufById/resultIsGgufById Maps with Sets of known
  GGUF ids to stop conflating "no hint" with "known not-GGUF".
- Make HfModelResult.isGguf required (it is always set in makeMapModel).
- Add regression tests for the None size case, mixed-repo inclusion in
  cached-gguf, and per-repo error isolation.

* fix: exclude mmproj from GGUF classification and case-normalize hint lookups

- _repo_gguf_size_bytes now filters mmproj vision-adapter files so
  safetensors+mmproj.gguf repos stay on the cached-models path and
  non-GGUF rows no longer show zero pickable variants. A vision-capable
  GGUF repo (main weight + mmproj adapter) still classifies as GGUF and
  reports the main weight size.
- modelGgufIds / resultGgufIds now key on lowercased ids and
  isKnownGgufRepo lowercases its lookup, so store and HF-search ids
  that differ only by casing still match the same GGUF hint.
- New regression tests: mmproj-only repo excluded from cached-gguf,
  same repo included in cached-models, vision-capable repo still
  classified as GGUF with correct size.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Roland Tannous <rolandtannous@gravityq.ai>
Co-authored-by: Roland Tannous <115670425+rolandtannous@users.noreply.github.com>
This commit is contained in:
Lee Jackson 2026-04-15 12:32:01 +01:00 committed by GitHub
parent 13928b5f0e
commit f9ef639dde
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 621 additions and 90 deletions

View file

@ -992,7 +992,7 @@ async def get_gguf_variants(
snapshots = entry / "snapshots"
if snapshots.is_dir():
for snap in snapshots.iterdir():
for f in snap.rglob("*.gguf"):
for f in _iter_gguf_paths(snap):
q = _extract_quant_label(f.name)
cached_bytes_by_quant[q] = (
cached_bytes_by_quant.get(q, 0) + f.stat().st_size
@ -1061,7 +1061,7 @@ async def get_gguf_download_progress(
for entry in cache_dir.iterdir():
if entry.name.lower() == target:
# Count completed .gguf files matching this variant in snapshots
for f in entry.rglob("*.gguf"):
for f in _iter_gguf_paths(entry):
fname = f.name.lower().replace("-", "").replace("_", "")
if not variant_lower or variant_lower in fname:
downloaded_bytes += f.stat().st_size
@ -1237,6 +1237,62 @@ def _all_hf_cache_scans():
return scans
def _is_gguf_filename(name: str) -> bool:
return name.lower().endswith(".gguf")
def _is_mmproj_filename(name: str) -> bool:
"""Match GGUF vision-adapter (mmproj) files. Kept consistent with
``utils.models.model_config._is_mmproj``."""
return "mmproj" in name.lower()
def _is_main_gguf_filename(name: str) -> bool:
"""A GGUF file that is a primary weight artifact, not an mmproj
vision adapter."""
return _is_gguf_filename(name) and not _is_mmproj_filename(name)
def _iter_gguf_paths(root: Path):
for path in root.rglob("*"):
if path.is_file() and _is_gguf_filename(path.name):
yield path
def _repo_gguf_size_bytes(repo_info) -> int:
"""Return the total on-disk size of primary GGUF weight files across
all revisions, excluding mmproj vision-adapter files.
Hugging Face hardlinks blobs shared between revisions, so this
deduplicates by blob path (or, as a fallback, by revision commit
hash + filename) to avoid double-counting the same bytes. Files
with an unknown size (``size_on_disk is None``, e.g. a partial or
interrupted download) are treated as zero bytes. mmproj files are
excluded so that repos whose only ``.gguf`` artifact is a vision
adapter are not classified as GGUF repos: the variant selector
filters mmproj out and would otherwise show zero pickable variants.
"""
unique_blobs: dict[str, int] = {}
for revision in repo_info.revisions:
rev_id = getattr(revision, "commit_hash", None) or str(id(revision))
for f in revision.files:
if _is_main_gguf_filename(f.file_name):
blob_path = getattr(f, "blob_path", None)
size = f.size_on_disk or 0
if blob_path:
unique_blobs[str(blob_path)] = size
else:
unique_blobs[f"{rev_id}:{f.file_name}"] = size
return sum(unique_blobs.values())
def _repo_has_gguf_files(repo_info) -> bool:
"""Return True when any revision in a cached repo contains a
primary GGUF weight file. Repos whose only ``.gguf`` artifact is
an mmproj vision adapter are not treated as GGUF here."""
return _repo_gguf_size_bytes(repo_info) > 0
@router.get("/cached-gguf")
async def list_cached_gguf(
current_subject: str = Depends(get_current_subject),
@ -1248,28 +1304,25 @@ async def list_cached_gguf(
seen_lower: dict[str, dict] = {}
for hf_cache in cache_scans:
for repo_info in hf_cache.repos:
if repo_info.repo_type != "model":
try:
if repo_info.repo_type != "model":
continue
repo_id = repo_info.repo_id
total_size = _repo_gguf_size_bytes(repo_info)
if total_size == 0:
continue
key = repo_id.lower()
existing = seen_lower.get(key)
if existing is None or total_size > existing["size_bytes"]:
seen_lower[key] = {
"repo_id": repo_id,
"size_bytes": total_size,
"cache_path": str(repo_info.repo_path),
}
except Exception as e:
repo_label = getattr(repo_info, "repo_id", "<unknown>")
logger.warning(f"Skipping cached GGUF repo {repo_label}: {e}")
continue
repo_id = repo_info.repo_id
if not repo_id.upper().endswith("-GGUF"):
continue
total_size = 0
has_gguf = False
for revision in repo_info.revisions:
for f in revision.files:
if f.file_name.endswith(".gguf"):
has_gguf = True
total_size += f.size_on_disk
if not has_gguf:
continue
key = repo_id.lower()
existing = seen_lower.get(key)
if existing is None or total_size > existing["size_bytes"]:
seen_lower[key] = {
"repo_id": repo_id,
"size_bytes": total_size,
"cache_path": str(repo_info.repo_path),
}
cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
return {"cached": cached}
except Exception as e:
@ -1290,30 +1343,37 @@ async def list_cached_models(
seen_lower: dict[str, dict] = {}
for hf_cache in cache_scans:
for repo_info in hf_cache.repos:
if repo_info.repo_type != "model":
try:
if repo_info.repo_type != "model":
continue
repo_id = repo_info.repo_id
if _repo_has_gguf_files(repo_info):
continue
total_size = sum(
(f.size_on_disk or 0)
for rev in repo_info.revisions
for f in rev.files
)
if total_size == 0:
continue
has_weights = any(
f.file_name.endswith(_WEIGHT_EXTENSIONS)
for rev in repo_info.revisions
for f in rev.files
)
if not has_weights:
continue
key = repo_id.lower()
existing = seen_lower.get(key)
if existing is None or total_size > existing["size_bytes"]:
seen_lower[key] = {
"repo_id": repo_id,
"size_bytes": total_size,
}
except Exception as e:
repo_label = getattr(repo_info, "repo_id", "<unknown>")
logger.warning(f"Skipping cached model repo {repo_label}: {e}")
continue
repo_id = repo_info.repo_id
if repo_id.upper().endswith("-GGUF"):
continue
total_size = sum(
f.size_on_disk for rev in repo_info.revisions for f in rev.files
)
if total_size == 0:
continue
has_weights = any(
f.file_name.endswith(_WEIGHT_EXTENSIONS)
for rev in repo_info.revisions
for f in rev.files
)
if not has_weights:
continue
key = repo_id.lower()
existing = seen_lower.get(key)
if existing is None or total_size > existing["size_bytes"]:
seen_lower[key] = {
"repo_id": repo_id,
"size_bytes": total_size,
}
cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
return {"cached": cached}
except Exception as e:
@ -1390,7 +1450,7 @@ async def delete_cached_model(
deleted_count = 0
for rev in target_repo.revisions:
for f in rev.files:
if not f.file_name.endswith(".gguf"):
if not _is_gguf_filename(f.file_name):
continue
quant = _extract_quant_label(f.file_name)
if quant.lower() != variant.lower():

View file

@ -0,0 +1,398 @@
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
import asyncio
import sys
import types
from pathlib import Path
from types import SimpleNamespace
# Keep this test runnable in lightweight environments where optional logging
# deps are not installed.
if "structlog" not in sys.modules:
class _DummyLogger:
def __getattr__(self, _name):
return lambda *args, **kwargs: None
sys.modules["structlog"] = types.SimpleNamespace(
BoundLogger = _DummyLogger,
get_logger = lambda *args, **kwargs: _DummyLogger(),
)
import routes.models as models_route
def _repo(
repo_id: str,
files: list[SimpleNamespace],
repo_path: Path,
*,
revisions: list[SimpleNamespace] | None = None,
) -> SimpleNamespace:
return SimpleNamespace(
repo_id = repo_id,
repo_type = "model",
repo_path = repo_path,
revisions = revisions or [SimpleNamespace(files = files)],
)
def _file(
name: str,
size_on_disk: int,
*,
blob_path: str | None = None,
) -> SimpleNamespace:
return SimpleNamespace(
file_name = name,
size_on_disk = size_on_disk,
blob_path = blob_path,
)
def test_iter_gguf_paths_matches_extension_case_insensitively(tmp_path):
nested = tmp_path / "snapshots" / "rev"
nested.mkdir(parents = True)
lower = nested / "Q4_K_M.gguf"
upper = nested / "Q8_0.GGUF"
other = nested / "README.md"
lower.write_text("a")
upper.write_text("b")
other.write_text("c")
result = sorted(path.name for path in models_route._iter_gguf_paths(tmp_path))
assert result == ["Q4_K_M.gguf", "Q8_0.GGUF"]
def test_list_cached_gguf_includes_non_suffix_repo_when_cache_contains_gguf(
monkeypatch, tmp_path
):
repo = _repo(
"HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
[_file("Q4_K_M.gguf", 5_000), _file("README.md", 10)],
tmp_path / "models--HauhauCS--Gemma",
)
scan = SimpleNamespace(repos = [repo])
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
"size_bytes": 5_000,
"cache_path": str(repo.repo_path),
}
]
def test_list_cached_gguf_matches_extension_case_insensitively(monkeypatch, tmp_path):
repo = _repo(
"Org/Model-Without-Suffix",
[_file("Q8_0.GGUF", 7_000)],
tmp_path / "models--Org--Model-Without-Suffix",
)
scan = SimpleNamespace(repos = [repo])
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/Model-Without-Suffix",
"size_bytes": 7_000,
"cache_path": str(repo.repo_path),
}
]
def test_list_cached_gguf_skips_repos_without_positive_gguf_size(monkeypatch, tmp_path):
missing = _repo(
"Org/ReadmeOnly",
[_file("README.md", 10)],
tmp_path / "models--Org--ReadmeOnly",
)
zero = _repo(
"Org/ZeroSize",
[_file("Q4_K_M.gguf", 0)],
tmp_path / "models--Org--ZeroSize",
)
scan = SimpleNamespace(repos = [missing, zero])
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == []
def test_list_cached_gguf_keeps_largest_duplicate_repo_across_scans(
monkeypatch, tmp_path
):
smaller = _repo(
"Org/Dupe",
[_file("Q4_K_M.gguf", 2_000)],
tmp_path / "models--Org--Dupe-a",
)
larger = _repo(
"org/dupe",
[_file("Q4_K_M.gguf", 5_000), _file("Q6_K.gguf", 1_000)],
tmp_path / "models--Org--Dupe-b",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [
SimpleNamespace(repos = [smaller]),
SimpleNamespace(repos = [larger]),
],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "org/dupe",
"size_bytes": 6_000,
"cache_path": str(larger.repo_path),
}
]
def test_list_cached_gguf_dedupes_shared_blobs_across_revisions(monkeypatch, tmp_path):
shared = "blobs/shared-q4"
repo = _repo(
"Org/SharedBlobRepo",
[],
tmp_path / "models--Org--SharedBlobRepo",
revisions = [
SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
],
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [repo])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/SharedBlobRepo",
"size_bytes": 5_000,
"cache_path": str(repo.repo_path),
}
]
def test_list_cached_models_skips_non_suffix_repo_when_gguf_files_exist(
monkeypatch, tmp_path
):
mixed = _repo(
"Org/MixedRepo",
[
_file("Q4_K_M.gguf", 5_000),
_file("model.safetensors", 10_000),
],
tmp_path / "models--Org--MixedRepo",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [mixed])],
)
result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
assert result["cached"] == []
def test_list_cached_gguf_includes_mixed_repo_with_gguf_and_safetensors(
monkeypatch, tmp_path
):
"""Mirror of the _skips_ test: the mixed repo should still surface in
cached-gguf so the picker can show it as a GGUF download."""
mixed = _repo(
"Org/MixedRepo",
[
_file("Q4_K_M.gguf", 5_000),
_file("model.safetensors", 10_000),
],
tmp_path / "models--Org--MixedRepo",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [mixed])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/MixedRepo",
"size_bytes": 5_000,
"cache_path": str(mixed.repo_path),
}
]
def test_list_cached_gguf_handles_none_size_on_disk(monkeypatch, tmp_path):
"""A partial/interrupted GGUF download has ``size_on_disk = None``. The
route must treat the unknown bytes as zero instead of raising TypeError
out of ``sum()`` and wiping the entire response."""
partial = _repo(
"Org/PartialDownload",
[_file("Q4_K_M.gguf", None), _file("Q6_K.gguf", 5_000)],
tmp_path / "models--Org--PartialDownload",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [partial])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/PartialDownload",
"size_bytes": 5_000,
"cache_path": str(partial.repo_path),
}
]
def test_list_cached_gguf_skips_malformed_repo_without_wiping_response(
monkeypatch, tmp_path
):
"""One repo raising during classification must not poison the response
for every other repo in the scan."""
class _ExplodingRepo:
repo_id = "Org/Broken"
repo_type = "model"
repo_path = tmp_path / "models--Org--Broken"
@property
def revisions(self):
raise RuntimeError("boom")
healthy = _repo(
"Org/Healthy",
[_file("Q4_K_M.gguf", 5_000)],
tmp_path / "models--Org--Healthy",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [_ExplodingRepo(), healthy])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/Healthy",
"size_bytes": 5_000,
"cache_path": str(healthy.repo_path),
}
]
def test_list_cached_gguf_skips_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
"""A repo whose only ``.gguf`` artifact is an mmproj vision adapter
must not be classified as a GGUF repo: the variant selector filters
mmproj out and the picker would otherwise show zero variants."""
mmproj_only = _repo(
"Org/MmprojOnly",
[
_file("mmproj-Q8_0.gguf", 5_000),
_file("model.safetensors", 10_000),
],
tmp_path / "models--Org--MmprojOnly",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [mmproj_only])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == []
def test_list_cached_models_includes_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
"""Mirror of the cached-gguf skip: a safetensors repo with an
auxiliary mmproj vision adapter must still surface in cached-models
so the user can load it as a normal model."""
mmproj_aux = _repo(
"Org/MmprojAux",
[
_file("mmproj-Q8_0.gguf", 5_000),
_file("model.safetensors", 10_000),
],
tmp_path / "models--Org--MmprojAux",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [mmproj_aux])],
)
result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/MmprojAux",
"size_bytes": 15_000,
}
]
def test_list_cached_gguf_includes_vision_repo_with_main_gguf_and_mmproj(
monkeypatch, tmp_path
):
"""A vision-capable GGUF repo (main weight + mmproj adapter) is still
a GGUF repo. The reported size is the main weight size; mmproj is
excluded from the GGUF-size accounting because it is filtered out at
classification time."""
vision_repo = _repo(
"Org/VisionGguf",
[
_file("Q4_K_M.gguf", 5_000),
_file("mmproj-Q8_0.gguf", 1_000),
],
tmp_path / "models--Org--VisionGguf",
)
monkeypatch.setattr(
models_route,
"_all_hf_cache_scans",
lambda: [SimpleNamespace(repos = [vision_repo])],
)
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
assert result["cached"] == [
{
"repo_id": "Org/VisionGguf",
"size_bytes": 5_000,
"cache_path": str(vision_repo.repo_path),
}
]

View file

@ -904,6 +904,18 @@ def _is_mmproj(filename: str) -> bool:
return "mmproj" in filename.lower()
def _is_gguf_filename(filename: str) -> bool:
return filename.lower().endswith(".gguf")
def _iter_gguf_files(directory: Path):
if not directory.is_dir():
return
for f in directory.iterdir():
if f.is_file() and _is_gguf_filename(f.name):
yield f
def detect_mmproj_file(path: str) -> Optional[str]:
"""
Find the mmproj (vision projection) GGUF file in a directory.
@ -919,7 +931,7 @@ def detect_mmproj_file(path: str) -> Optional[str]:
if not search_dir.is_dir():
return None
for f in search_dir.glob("*.gguf"):
for f in _iter_gguf_files(search_dir):
if _is_mmproj(f.name):
return str(f.resolve())
return None
@ -942,7 +954,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
p = Path(path)
# Case 1: direct .gguf file
if p.suffix == ".gguf" and p.is_file():
if p.suffix.lower() == ".gguf" and p.is_file():
if _is_mmproj(p.name):
return None
return str(p.resolve())
@ -950,7 +962,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
# Case 2: directory containing .gguf files (skip mmproj)
if p.is_dir():
gguf_files = sorted(
(f for f in p.glob("*.gguf") if not _is_mmproj(f.name)),
(f for f in _iter_gguf_files(p) if not _is_mmproj(f.name)),
key = lambda f: f.stat().st_size,
reverse = True,
)
@ -1015,7 +1027,7 @@ def _pick_best_gguf(filenames: list[str]) -> Optional[str]:
Prefers quantization levels in _GGUF_QUANT_PREFERENCE order.
Falls back to the first .gguf file found.
"""
gguf_files = [f for f in filenames if f.endswith(".gguf")]
gguf_files = [f for f in filenames if f.lower().endswith(".gguf")]
if not gguf_files:
return None
@ -1100,7 +1112,7 @@ def list_gguf_variants(
for sibling in info.siblings:
fname = sibling.rfilename
if not fname.endswith(".gguf"):
if not fname.lower().endswith(".gguf"):
continue
size = sibling.size or 0
@ -1171,7 +1183,7 @@ def list_local_gguf_variants(
quant_first_file: dict[str, str] = {}
has_vision = False
for f in sorted(p.glob("*.gguf")):
for f in sorted(_iter_gguf_files(p)):
if _is_mmproj(f.name):
has_vision = True
continue
@ -1210,7 +1222,7 @@ def _find_local_gguf_by_variant(directory: str, variant: str) -> Optional[str]:
matches = sorted(
f
for f in p.glob("*.gguf")
for f in _iter_gguf_files(p)
if not _is_mmproj(f.name) and _extract_quant_label(f.name) == variant
)
if matches:
@ -1438,7 +1450,9 @@ def scan_exported_models(
# Check for flat GGUF export (e.g. exports/gemma-3-4b-it-finetune-gguf/)
# Filter out mmproj (vision projection) files — they aren't loadable as main models
gguf_files = [f for f in run_dir.glob("*.gguf") if not _is_mmproj(f.name)]
gguf_files = [
f for f in _iter_gguf_files(run_dir) if not _is_mmproj(f.name)
]
if gguf_files:
base_model = None
export_meta = run_dir / "export_metadata.json"
@ -1465,7 +1479,7 @@ def scan_exported_models(
has_weights = any(checkpoint_dir.glob("*.safetensors")) or any(
checkpoint_dir.glob("*.bin")
)
has_gguf = any(checkpoint_dir.glob("*.gguf"))
has_gguf = any(_iter_gguf_files(checkpoint_dir))
base_model = None
export_type = None
@ -1488,7 +1502,7 @@ def scan_exported_models(
pass
elif has_gguf:
export_type = "gguf"
gguf_list = list(checkpoint_dir.glob("*.gguf"))
gguf_list = list(_iter_gguf_files(checkpoint_dir))
# Check checkpoint_dir first, then fall back to parent run_dir
# (export.py writes metadata to the top-level export directory)
for meta_dir in (checkpoint_dir, run_dir):

View file

@ -399,10 +399,14 @@ function GgufVariantExpander({
);
}
// ── Detect GGUF repos by naming convention ────────────────────
// ── Detect GGUF repos by naming convention or hub tag ────────────────────
function isGgufRepo(id: string): boolean {
return id.toUpperCase().includes("-GGUF");
function hasGgufSuffix(id: string): boolean {
return /-GGUF(?:$|-)/i.test(id);
}
function isGgufRepo(id: string, hintedIsGguf?: boolean): boolean {
return Boolean(hintedIsGguf) || hasGgufSuffix(id);
}
/** Extract param count label from model name (e.g. "Qwen3-0.6B" -> "0.6B"). */
@ -451,6 +455,33 @@ export function HubModelPicker({
const { results, isLoading, isLoadingMore, fetchMore } =
useHfModelSearch(debouncedQuery);
// Sets of lowercased repo ids that the store or HF search have
// confirmed are GGUF. Absence means "no hint" and lets hasGgufSuffix
// take over as fallback, rather than conflating unknown with known-
// not-GGUF. Keys are lowercased so that store IDs and HF search IDs
// that differ only by casing still match the same hint.
const modelGgufIds = useMemo(() => {
const ids = new Set<string>();
for (const model of models) {
if (model.isGguf) ids.add(model.id.toLowerCase());
}
return ids;
}, [models]);
const resultGgufIds = useMemo(() => {
const ids = new Set<string>();
for (const result of results) {
if (result.isGguf) ids.add(result.id.toLowerCase());
}
return ids;
}, [results]);
const isKnownGgufRepo = useCallback(
(id: string): boolean => {
const key = id.toLowerCase();
return isGgufRepo(id, resultGgufIds.has(key) || modelGgufIds.has(key));
},
[modelGgufIds, resultGgufIds],
);
// Track which GGUF repo is expanded for variant selection
const [expandedGguf, setExpandedGguf] = useState<string | null>(null);
@ -625,17 +656,17 @@ export function HubModelPicker({
const recommendedIds = useMemo(() => {
const all = dedupe([...models.map((model) => model.id), value ?? ""])
.filter((id) => !downloadedSet.has(id.toLowerCase()))
.filter((id) => !chatOnly || isGgufRepo(id))
.filter((id) => !chatOnly || isKnownGgufRepo(id))
.filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
// Sort: GGUFs first, then hub models
const gguf: string[] = [];
const hub: string[] = [];
for (const id of all) {
if (isGgufRepo(id)) gguf.push(id);
if (isKnownGgufRepo(id)) gguf.push(id);
else hub.push(id);
}
return [...gguf, ...hub];
}, [models, value, downloadedSet, chatOnly]);
}, [models, value, downloadedSet, chatOnly, isKnownGgufRepo]);
// Infinite scroll paging for the recommended section
const [recommendedPage, setRecommendedPage] = useState(1);
@ -645,7 +676,7 @@ export function HubModelPicker({
}, [models, chatOnly]);
const visibleRecommendedIds = useMemo(() => {
const hubStartIndex = recommendedIds.findIndex((id) => !isGgufRepo(id));
const hubStartIndex = recommendedIds.findIndex((id) => !isKnownGgufRepo(id));
const allGguf =
hubStartIndex === -1
? recommendedIds
@ -659,7 +690,7 @@ export function HubModelPicker({
result.push(...allHub.slice(p * 4, (p + 1) * 4));
}
return result;
}, [recommendedIds, recommendedPage]);
}, [recommendedIds, recommendedPage, isKnownGgufRepo]);
const hasMoreRecommended =
visibleRecommendedIds.length < recommendedIds.length;
@ -681,8 +712,8 @@ export function HubModelPicker({
const ids = showHfSection
? [...new Set([...visibleRecommendedIds, ...filteredRecommendedIds])]
: visibleRecommendedIds;
return ids.filter((id) => !isGgufRepo(id));
}, [visibleRecommendedIds, showHfSection, filteredRecommendedIds]);
return ids.filter((id) => !isKnownGgufRepo(id));
}, [visibleRecommendedIds, showHfSection, filteredRecommendedIds, isKnownGgufRepo]);
const { paramCountById: recommendedParamCountById } =
useRecommendedModelVram(idsForVram);
@ -697,9 +728,9 @@ export function HubModelPicker({
return results
.map((result) => result.id)
.filter((id) => !recommendedSet.has(id))
.filter((id) => !chatOnly || isGgufRepo(id))
.filter((id) => !chatOnly || isKnownGgufRepo(id))
.filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
}, [recommendedSet, results, showHfSection, chatOnly]);
}, [recommendedSet, results, showHfSection, chatOnly, isKnownGgufRepo]);
const metricsById = useMemo(
() =>
@ -800,14 +831,14 @@ export function HubModelPicker({
/** Handle clicking a model row — GGUF repos expand, others load directly. */
const handleModelClick = useCallback(
(id: string) => {
if (isGgufRepo(id)) {
if (isKnownGgufRepo(id)) {
// Toggle GGUF variant expander
setExpandedGguf((prev) => (prev === id ? null : id));
} else {
onSelect(id, { source: "hub", isLora: false });
}
},
[onSelect],
[onSelect, isKnownGgufRepo],
);
return (
@ -848,7 +879,11 @@ export function HubModelPicker({
label={c.repo_id}
meta={`GGUF · ${formatBytes(c.size_bytes)}`}
selected={value === c.repo_id}
onClick={() => handleModelClick(c.repo_id)}
onClick={() =>
setExpandedGguf((prev) =>
prev === c.repo_id ? null : c.repo_id,
)
}
vramStatus={null}
/>
{expandedGguf === c.repo_id && (
@ -909,7 +944,7 @@ export function HubModelPicker({
<ModelRow
label={m.model_id ?? m.display_name}
meta={
isGguf || m.path.endsWith(".gguf") ? "GGUF" : "Local"
isGguf || m.path.toLowerCase().endsWith(".gguf") ? "GGUF" : "Local"
}
selected={value === m.id}
onClick={() => {
@ -1036,7 +1071,7 @@ export function HubModelPicker({
const isGguf =
isGgufRepo(m.id) ||
isGgufRepo(m.display_name) ||
m.path.endsWith(".gguf");
m.path.toLowerCase().endsWith(".gguf");
return (
<div key={m.id}>
<ModelRow
@ -1089,16 +1124,22 @@ export function HubModelPicker({
<ModelRow
label={id}
meta={
isGgufRepo(id)
isKnownGgufRepo(id)
? "GGUF"
: (vram?.detail ?? extractParamLabel(id))
}
selected={value === id}
onClick={() => handleModelClick(id)}
onClick={() => {
if (isKnownGgufRepo(id)) {
setExpandedGguf((prev) => (prev === id ? null : id));
} else {
handleModelClick(id);
}
}}
vramStatus={
isGgufRepo(id) ? null : (vram?.status ?? null)
isKnownGgufRepo(id) ? null : (vram?.status ?? null)
}
vramEst={isGgufRepo(id) ? undefined : vram?.est}
vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
/>
{expandedGguf === id && (
@ -1136,16 +1177,22 @@ export function HubModelPicker({
<ModelRow
label={id}
meta={
isGgufRepo(id)
isKnownGgufRepo(id)
? "GGUF"
: (vram?.detail ?? extractParamLabel(id))
}
selected={value === id}
onClick={() => handleModelClick(id)}
onClick={() => {
if (isKnownGgufRepo(id)) {
setExpandedGguf((prev) => (prev === id ? null : id));
} else {
handleModelClick(id);
}
}}
vramStatus={
isGgufRepo(id) ? null : (vram?.status ?? null)
isKnownGgufRepo(id) ? null : (vram?.status ?? null)
}
vramEst={isGgufRepo(id) ? undefined : vram?.est}
vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
/>
{expandedGguf === id && (
@ -1175,24 +1222,31 @@ export function HubModelPicker({
No matching models.
</div>
) : null
) : (
) : (
hfIds.map((id) => {
const vram = vramMap.get(id);
const isSearchGguf = isKnownGgufRepo(id);
return (
<div key={id}>
<ModelRow
label={id}
meta={
isGgufRepo(id)
isSearchGguf
? "GGUF"
: (metricsById.get(id) ?? extractParamLabel(id))
}
selected={value === id}
onClick={() => handleModelClick(id)}
onClick={() => {
if (isSearchGguf) {
setExpandedGguf((prev) => (prev === id ? null : id));
} else {
handleModelClick(id);
}
}}
vramStatus={
isGgufRepo(id) ? null : (vram?.status ?? null)
isSearchGguf ? null : (vram?.status ?? null)
}
vramEst={isGgufRepo(id) ? undefined : vram?.est}
vramEst={isSearchGguf ? undefined : vram?.est}
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
/>
{expandedGguf === id && (

View file

@ -8,6 +8,7 @@ export interface ModelOption {
name: string;
description?: string;
icon?: ReactNode;
isGguf?: boolean;
}
export interface LoraModelOption extends ModelOption {
@ -24,4 +25,3 @@ export interface ModelSelectorChangeMeta {
isDownloaded?: boolean;
expectedBytes?: number;
}

View file

@ -691,6 +691,7 @@ export function ChatPage(): ReactElement {
id: model.id,
name: model.name,
description: model.description,
isGguf: model.isGguf,
})),
[modelsFromStore],
);

View file

@ -13,6 +13,7 @@ export interface HfModelResult {
likes: number;
totalParams?: number;
estimatedSizeBytes?: number;
isGguf: boolean;
}
const EXCLUDED_TAGS = new Set([
@ -89,7 +90,10 @@ function makeMapModel(excludeGguf: boolean) {
if (!isEmbedding && m.tags?.some((t) => EXCLUDED_TAGS.has(t))) {
return null;
}
if (excludeGguf && m.tags?.includes("gguf")) {
const isGguf =
Boolean(m.tags?.some((tag) => tag.toLowerCase() === "gguf")) ||
/-GGUF(?:$|-)/i.test(m.name);
if (excludeGguf && isGguf) {
return null;
}
return {
@ -98,6 +102,7 @@ function makeMapModel(excludeGguf: boolean) {
likes: m.likes,
totalParams: m.safetensors?.total,
estimatedSizeBytes: estimateSizeFromDtypes(m.safetensors?.parameters),
isGguf,
};
};
}
@ -330,4 +335,3 @@ export function useHfModelSearch(
return { ...search, results };
}