mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
Studio: support GGUF variant selection for non-suffixed repos (#5023)
* fix: support GGUF variant selection for non-suffixed repos * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: harden GGUF detection across cached models and picker flows * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * chore: use shared GGUF picker helper for search rows * fix: avoid mixed cache duplication and preserve GGUF fallback detection * fix: unify GGUF cache matching and merge picker hints * fix: normalize local GGUF matching across picker and model config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: robust cached-gguf classification + hint-aware click routing - _repo_gguf_size_bytes: treat size_on_disk=None as 0 and dedupe fallback by commit_hash so partial/interrupted downloads don't TypeError out of sum() and wipe the entire cached list. - list_cached_gguf / list_cached_models: narrow per-repo try/except so one malformed repo no longer poisons the whole response. - handleModelClick: route through isKnownGgufRepo instead of the suffix-only isGgufRepo, so non-suffixed GGUF repos still open the variant expander from every call site. - Replace the modelIsGgufById/resultIsGgufById Maps with Sets of known GGUF ids to stop conflating "no hint" with "known not-GGUF". - Make HfModelResult.isGguf required (it is always set in makeMapModel). - Add regression tests for the None size case, mixed-repo inclusion in cached-gguf, and per-repo error isolation. * fix: exclude mmproj from GGUF classification and case-normalize hint lookups - _repo_gguf_size_bytes now filters mmproj vision-adapter files so safetensors+mmproj.gguf repos stay on the cached-models path and non-GGUF rows no longer show zero pickable variants. A vision-capable GGUF repo (main weight + mmproj adapter) still classifies as GGUF and reports the main weight size. - modelGgufIds / resultGgufIds now key on lowercased ids and isKnownGgufRepo lowercases its lookup, so store and HF-search ids that differ only by casing still match the same GGUF hint. - New regression tests: mmproj-only repo excluded from cached-gguf, same repo included in cached-models, vision-capable repo still classified as GGUF with correct size. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Roland Tannous <rolandtannous@gravityq.ai> Co-authored-by: Roland Tannous <115670425+rolandtannous@users.noreply.github.com>
This commit is contained in:
parent
13928b5f0e
commit
f9ef639dde
7 changed files with 621 additions and 90 deletions
|
|
@ -992,7 +992,7 @@ async def get_gguf_variants(
|
|||
snapshots = entry / "snapshots"
|
||||
if snapshots.is_dir():
|
||||
for snap in snapshots.iterdir():
|
||||
for f in snap.rglob("*.gguf"):
|
||||
for f in _iter_gguf_paths(snap):
|
||||
q = _extract_quant_label(f.name)
|
||||
cached_bytes_by_quant[q] = (
|
||||
cached_bytes_by_quant.get(q, 0) + f.stat().st_size
|
||||
|
|
@ -1061,7 +1061,7 @@ async def get_gguf_download_progress(
|
|||
for entry in cache_dir.iterdir():
|
||||
if entry.name.lower() == target:
|
||||
# Count completed .gguf files matching this variant in snapshots
|
||||
for f in entry.rglob("*.gguf"):
|
||||
for f in _iter_gguf_paths(entry):
|
||||
fname = f.name.lower().replace("-", "").replace("_", "")
|
||||
if not variant_lower or variant_lower in fname:
|
||||
downloaded_bytes += f.stat().st_size
|
||||
|
|
@ -1237,6 +1237,62 @@ def _all_hf_cache_scans():
|
|||
return scans
|
||||
|
||||
|
||||
def _is_gguf_filename(name: str) -> bool:
|
||||
return name.lower().endswith(".gguf")
|
||||
|
||||
|
||||
def _is_mmproj_filename(name: str) -> bool:
|
||||
"""Match GGUF vision-adapter (mmproj) files. Kept consistent with
|
||||
``utils.models.model_config._is_mmproj``."""
|
||||
return "mmproj" in name.lower()
|
||||
|
||||
|
||||
def _is_main_gguf_filename(name: str) -> bool:
|
||||
"""A GGUF file that is a primary weight artifact, not an mmproj
|
||||
vision adapter."""
|
||||
return _is_gguf_filename(name) and not _is_mmproj_filename(name)
|
||||
|
||||
|
||||
def _iter_gguf_paths(root: Path):
|
||||
for path in root.rglob("*"):
|
||||
if path.is_file() and _is_gguf_filename(path.name):
|
||||
yield path
|
||||
|
||||
|
||||
def _repo_gguf_size_bytes(repo_info) -> int:
|
||||
"""Return the total on-disk size of primary GGUF weight files across
|
||||
all revisions, excluding mmproj vision-adapter files.
|
||||
|
||||
Hugging Face hardlinks blobs shared between revisions, so this
|
||||
deduplicates by blob path (or, as a fallback, by revision commit
|
||||
hash + filename) to avoid double-counting the same bytes. Files
|
||||
with an unknown size (``size_on_disk is None``, e.g. a partial or
|
||||
interrupted download) are treated as zero bytes. mmproj files are
|
||||
excluded so that repos whose only ``.gguf`` artifact is a vision
|
||||
adapter are not classified as GGUF repos: the variant selector
|
||||
filters mmproj out and would otherwise show zero pickable variants.
|
||||
"""
|
||||
unique_blobs: dict[str, int] = {}
|
||||
for revision in repo_info.revisions:
|
||||
rev_id = getattr(revision, "commit_hash", None) or str(id(revision))
|
||||
for f in revision.files:
|
||||
if _is_main_gguf_filename(f.file_name):
|
||||
blob_path = getattr(f, "blob_path", None)
|
||||
size = f.size_on_disk or 0
|
||||
if blob_path:
|
||||
unique_blobs[str(blob_path)] = size
|
||||
else:
|
||||
unique_blobs[f"{rev_id}:{f.file_name}"] = size
|
||||
return sum(unique_blobs.values())
|
||||
|
||||
|
||||
def _repo_has_gguf_files(repo_info) -> bool:
|
||||
"""Return True when any revision in a cached repo contains a
|
||||
primary GGUF weight file. Repos whose only ``.gguf`` artifact is
|
||||
an mmproj vision adapter are not treated as GGUF here."""
|
||||
return _repo_gguf_size_bytes(repo_info) > 0
|
||||
|
||||
|
||||
@router.get("/cached-gguf")
|
||||
async def list_cached_gguf(
|
||||
current_subject: str = Depends(get_current_subject),
|
||||
|
|
@ -1248,28 +1304,25 @@ async def list_cached_gguf(
|
|||
seen_lower: dict[str, dict] = {}
|
||||
for hf_cache in cache_scans:
|
||||
for repo_info in hf_cache.repos:
|
||||
if repo_info.repo_type != "model":
|
||||
try:
|
||||
if repo_info.repo_type != "model":
|
||||
continue
|
||||
repo_id = repo_info.repo_id
|
||||
total_size = _repo_gguf_size_bytes(repo_info)
|
||||
if total_size == 0:
|
||||
continue
|
||||
key = repo_id.lower()
|
||||
existing = seen_lower.get(key)
|
||||
if existing is None or total_size > existing["size_bytes"]:
|
||||
seen_lower[key] = {
|
||||
"repo_id": repo_id,
|
||||
"size_bytes": total_size,
|
||||
"cache_path": str(repo_info.repo_path),
|
||||
}
|
||||
except Exception as e:
|
||||
repo_label = getattr(repo_info, "repo_id", "<unknown>")
|
||||
logger.warning(f"Skipping cached GGUF repo {repo_label}: {e}")
|
||||
continue
|
||||
repo_id = repo_info.repo_id
|
||||
if not repo_id.upper().endswith("-GGUF"):
|
||||
continue
|
||||
total_size = 0
|
||||
has_gguf = False
|
||||
for revision in repo_info.revisions:
|
||||
for f in revision.files:
|
||||
if f.file_name.endswith(".gguf"):
|
||||
has_gguf = True
|
||||
total_size += f.size_on_disk
|
||||
if not has_gguf:
|
||||
continue
|
||||
key = repo_id.lower()
|
||||
existing = seen_lower.get(key)
|
||||
if existing is None or total_size > existing["size_bytes"]:
|
||||
seen_lower[key] = {
|
||||
"repo_id": repo_id,
|
||||
"size_bytes": total_size,
|
||||
"cache_path": str(repo_info.repo_path),
|
||||
}
|
||||
cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
|
||||
return {"cached": cached}
|
||||
except Exception as e:
|
||||
|
|
@ -1290,30 +1343,37 @@ async def list_cached_models(
|
|||
seen_lower: dict[str, dict] = {}
|
||||
for hf_cache in cache_scans:
|
||||
for repo_info in hf_cache.repos:
|
||||
if repo_info.repo_type != "model":
|
||||
try:
|
||||
if repo_info.repo_type != "model":
|
||||
continue
|
||||
repo_id = repo_info.repo_id
|
||||
if _repo_has_gguf_files(repo_info):
|
||||
continue
|
||||
total_size = sum(
|
||||
(f.size_on_disk or 0)
|
||||
for rev in repo_info.revisions
|
||||
for f in rev.files
|
||||
)
|
||||
if total_size == 0:
|
||||
continue
|
||||
has_weights = any(
|
||||
f.file_name.endswith(_WEIGHT_EXTENSIONS)
|
||||
for rev in repo_info.revisions
|
||||
for f in rev.files
|
||||
)
|
||||
if not has_weights:
|
||||
continue
|
||||
key = repo_id.lower()
|
||||
existing = seen_lower.get(key)
|
||||
if existing is None or total_size > existing["size_bytes"]:
|
||||
seen_lower[key] = {
|
||||
"repo_id": repo_id,
|
||||
"size_bytes": total_size,
|
||||
}
|
||||
except Exception as e:
|
||||
repo_label = getattr(repo_info, "repo_id", "<unknown>")
|
||||
logger.warning(f"Skipping cached model repo {repo_label}: {e}")
|
||||
continue
|
||||
repo_id = repo_info.repo_id
|
||||
if repo_id.upper().endswith("-GGUF"):
|
||||
continue
|
||||
total_size = sum(
|
||||
f.size_on_disk for rev in repo_info.revisions for f in rev.files
|
||||
)
|
||||
if total_size == 0:
|
||||
continue
|
||||
has_weights = any(
|
||||
f.file_name.endswith(_WEIGHT_EXTENSIONS)
|
||||
for rev in repo_info.revisions
|
||||
for f in rev.files
|
||||
)
|
||||
if not has_weights:
|
||||
continue
|
||||
key = repo_id.lower()
|
||||
existing = seen_lower.get(key)
|
||||
if existing is None or total_size > existing["size_bytes"]:
|
||||
seen_lower[key] = {
|
||||
"repo_id": repo_id,
|
||||
"size_bytes": total_size,
|
||||
}
|
||||
cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
|
||||
return {"cached": cached}
|
||||
except Exception as e:
|
||||
|
|
@ -1390,7 +1450,7 @@ async def delete_cached_model(
|
|||
deleted_count = 0
|
||||
for rev in target_repo.revisions:
|
||||
for f in rev.files:
|
||||
if not f.file_name.endswith(".gguf"):
|
||||
if not _is_gguf_filename(f.file_name):
|
||||
continue
|
||||
quant = _extract_quant_label(f.file_name)
|
||||
if quant.lower() != variant.lower():
|
||||
|
|
|
|||
398
studio/backend/tests/test_cached_gguf_routes.py
Normal file
398
studio/backend/tests/test_cached_gguf_routes.py
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
# Keep this test runnable in lightweight environments where optional logging
|
||||
# deps are not installed.
|
||||
if "structlog" not in sys.modules:
|
||||
|
||||
class _DummyLogger:
|
||||
def __getattr__(self, _name):
|
||||
return lambda *args, **kwargs: None
|
||||
|
||||
sys.modules["structlog"] = types.SimpleNamespace(
|
||||
BoundLogger = _DummyLogger,
|
||||
get_logger = lambda *args, **kwargs: _DummyLogger(),
|
||||
)
|
||||
|
||||
import routes.models as models_route
|
||||
|
||||
|
||||
def _repo(
|
||||
repo_id: str,
|
||||
files: list[SimpleNamespace],
|
||||
repo_path: Path,
|
||||
*,
|
||||
revisions: list[SimpleNamespace] | None = None,
|
||||
) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
repo_id = repo_id,
|
||||
repo_type = "model",
|
||||
repo_path = repo_path,
|
||||
revisions = revisions or [SimpleNamespace(files = files)],
|
||||
)
|
||||
|
||||
|
||||
def _file(
|
||||
name: str,
|
||||
size_on_disk: int,
|
||||
*,
|
||||
blob_path: str | None = None,
|
||||
) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
file_name = name,
|
||||
size_on_disk = size_on_disk,
|
||||
blob_path = blob_path,
|
||||
)
|
||||
|
||||
|
||||
def test_iter_gguf_paths_matches_extension_case_insensitively(tmp_path):
|
||||
nested = tmp_path / "snapshots" / "rev"
|
||||
nested.mkdir(parents = True)
|
||||
lower = nested / "Q4_K_M.gguf"
|
||||
upper = nested / "Q8_0.GGUF"
|
||||
other = nested / "README.md"
|
||||
lower.write_text("a")
|
||||
upper.write_text("b")
|
||||
other.write_text("c")
|
||||
|
||||
result = sorted(path.name for path in models_route._iter_gguf_paths(tmp_path))
|
||||
|
||||
assert result == ["Q4_K_M.gguf", "Q8_0.GGUF"]
|
||||
|
||||
|
||||
def test_list_cached_gguf_includes_non_suffix_repo_when_cache_contains_gguf(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
repo = _repo(
|
||||
"HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
|
||||
[_file("Q4_K_M.gguf", 5_000), _file("README.md", 10)],
|
||||
tmp_path / "models--HauhauCS--Gemma",
|
||||
)
|
||||
scan = SimpleNamespace(repos = [repo])
|
||||
|
||||
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "HauhauCS/Gemma-4-E4B-Uncensored-HauhauCS-Aggressive",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(repo.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_matches_extension_case_insensitively(monkeypatch, tmp_path):
|
||||
repo = _repo(
|
||||
"Org/Model-Without-Suffix",
|
||||
[_file("Q8_0.GGUF", 7_000)],
|
||||
tmp_path / "models--Org--Model-Without-Suffix",
|
||||
)
|
||||
scan = SimpleNamespace(repos = [repo])
|
||||
|
||||
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/Model-Without-Suffix",
|
||||
"size_bytes": 7_000,
|
||||
"cache_path": str(repo.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_skips_repos_without_positive_gguf_size(monkeypatch, tmp_path):
|
||||
missing = _repo(
|
||||
"Org/ReadmeOnly",
|
||||
[_file("README.md", 10)],
|
||||
tmp_path / "models--Org--ReadmeOnly",
|
||||
)
|
||||
zero = _repo(
|
||||
"Org/ZeroSize",
|
||||
[_file("Q4_K_M.gguf", 0)],
|
||||
tmp_path / "models--Org--ZeroSize",
|
||||
)
|
||||
scan = SimpleNamespace(repos = [missing, zero])
|
||||
|
||||
monkeypatch.setattr(models_route, "_all_hf_cache_scans", lambda: [scan])
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == []
|
||||
|
||||
|
||||
def test_list_cached_gguf_keeps_largest_duplicate_repo_across_scans(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
smaller = _repo(
|
||||
"Org/Dupe",
|
||||
[_file("Q4_K_M.gguf", 2_000)],
|
||||
tmp_path / "models--Org--Dupe-a",
|
||||
)
|
||||
larger = _repo(
|
||||
"org/dupe",
|
||||
[_file("Q4_K_M.gguf", 5_000), _file("Q6_K.gguf", 1_000)],
|
||||
tmp_path / "models--Org--Dupe-b",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [
|
||||
SimpleNamespace(repos = [smaller]),
|
||||
SimpleNamespace(repos = [larger]),
|
||||
],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "org/dupe",
|
||||
"size_bytes": 6_000,
|
||||
"cache_path": str(larger.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_dedupes_shared_blobs_across_revisions(monkeypatch, tmp_path):
|
||||
shared = "blobs/shared-q4"
|
||||
repo = _repo(
|
||||
"Org/SharedBlobRepo",
|
||||
[],
|
||||
tmp_path / "models--Org--SharedBlobRepo",
|
||||
revisions = [
|
||||
SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
|
||||
SimpleNamespace(files = [_file("Q4_K_M.gguf", 5_000, blob_path = shared)]),
|
||||
],
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [repo])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/SharedBlobRepo",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(repo.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_models_skips_non_suffix_repo_when_gguf_files_exist(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
mixed = _repo(
|
||||
"Org/MixedRepo",
|
||||
[
|
||||
_file("Q4_K_M.gguf", 5_000),
|
||||
_file("model.safetensors", 10_000),
|
||||
],
|
||||
tmp_path / "models--Org--MixedRepo",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [mixed])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == []
|
||||
|
||||
|
||||
def test_list_cached_gguf_includes_mixed_repo_with_gguf_and_safetensors(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
"""Mirror of the _skips_ test: the mixed repo should still surface in
|
||||
cached-gguf so the picker can show it as a GGUF download."""
|
||||
mixed = _repo(
|
||||
"Org/MixedRepo",
|
||||
[
|
||||
_file("Q4_K_M.gguf", 5_000),
|
||||
_file("model.safetensors", 10_000),
|
||||
],
|
||||
tmp_path / "models--Org--MixedRepo",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [mixed])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/MixedRepo",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(mixed.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_handles_none_size_on_disk(monkeypatch, tmp_path):
|
||||
"""A partial/interrupted GGUF download has ``size_on_disk = None``. The
|
||||
route must treat the unknown bytes as zero instead of raising TypeError
|
||||
out of ``sum()`` and wiping the entire response."""
|
||||
partial = _repo(
|
||||
"Org/PartialDownload",
|
||||
[_file("Q4_K_M.gguf", None), _file("Q6_K.gguf", 5_000)],
|
||||
tmp_path / "models--Org--PartialDownload",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [partial])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/PartialDownload",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(partial.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_skips_malformed_repo_without_wiping_response(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
"""One repo raising during classification must not poison the response
|
||||
for every other repo in the scan."""
|
||||
|
||||
class _ExplodingRepo:
|
||||
repo_id = "Org/Broken"
|
||||
repo_type = "model"
|
||||
repo_path = tmp_path / "models--Org--Broken"
|
||||
|
||||
@property
|
||||
def revisions(self):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
healthy = _repo(
|
||||
"Org/Healthy",
|
||||
[_file("Q4_K_M.gguf", 5_000)],
|
||||
tmp_path / "models--Org--Healthy",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [_ExplodingRepo(), healthy])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/Healthy",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(healthy.repo_path),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_skips_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
|
||||
"""A repo whose only ``.gguf`` artifact is an mmproj vision adapter
|
||||
must not be classified as a GGUF repo: the variant selector filters
|
||||
mmproj out and the picker would otherwise show zero variants."""
|
||||
mmproj_only = _repo(
|
||||
"Org/MmprojOnly",
|
||||
[
|
||||
_file("mmproj-Q8_0.gguf", 5_000),
|
||||
_file("model.safetensors", 10_000),
|
||||
],
|
||||
tmp_path / "models--Org--MmprojOnly",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [mmproj_only])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == []
|
||||
|
||||
|
||||
def test_list_cached_models_includes_repo_with_only_mmproj_gguf(monkeypatch, tmp_path):
|
||||
"""Mirror of the cached-gguf skip: a safetensors repo with an
|
||||
auxiliary mmproj vision adapter must still surface in cached-models
|
||||
so the user can load it as a normal model."""
|
||||
mmproj_aux = _repo(
|
||||
"Org/MmprojAux",
|
||||
[
|
||||
_file("mmproj-Q8_0.gguf", 5_000),
|
||||
_file("model.safetensors", 10_000),
|
||||
],
|
||||
tmp_path / "models--Org--MmprojAux",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [mmproj_aux])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_models(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/MmprojAux",
|
||||
"size_bytes": 15_000,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_list_cached_gguf_includes_vision_repo_with_main_gguf_and_mmproj(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
"""A vision-capable GGUF repo (main weight + mmproj adapter) is still
|
||||
a GGUF repo. The reported size is the main weight size; mmproj is
|
||||
excluded from the GGUF-size accounting because it is filtered out at
|
||||
classification time."""
|
||||
vision_repo = _repo(
|
||||
"Org/VisionGguf",
|
||||
[
|
||||
_file("Q4_K_M.gguf", 5_000),
|
||||
_file("mmproj-Q8_0.gguf", 1_000),
|
||||
],
|
||||
tmp_path / "models--Org--VisionGguf",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
models_route,
|
||||
"_all_hf_cache_scans",
|
||||
lambda: [SimpleNamespace(repos = [vision_repo])],
|
||||
)
|
||||
|
||||
result = asyncio.run(models_route.list_cached_gguf(current_subject = "test-user"))
|
||||
|
||||
assert result["cached"] == [
|
||||
{
|
||||
"repo_id": "Org/VisionGguf",
|
||||
"size_bytes": 5_000,
|
||||
"cache_path": str(vision_repo.repo_path),
|
||||
}
|
||||
]
|
||||
|
|
@ -904,6 +904,18 @@ def _is_mmproj(filename: str) -> bool:
|
|||
return "mmproj" in filename.lower()
|
||||
|
||||
|
||||
def _is_gguf_filename(filename: str) -> bool:
|
||||
return filename.lower().endswith(".gguf")
|
||||
|
||||
|
||||
def _iter_gguf_files(directory: Path):
|
||||
if not directory.is_dir():
|
||||
return
|
||||
for f in directory.iterdir():
|
||||
if f.is_file() and _is_gguf_filename(f.name):
|
||||
yield f
|
||||
|
||||
|
||||
def detect_mmproj_file(path: str) -> Optional[str]:
|
||||
"""
|
||||
Find the mmproj (vision projection) GGUF file in a directory.
|
||||
|
|
@ -919,7 +931,7 @@ def detect_mmproj_file(path: str) -> Optional[str]:
|
|||
if not search_dir.is_dir():
|
||||
return None
|
||||
|
||||
for f in search_dir.glob("*.gguf"):
|
||||
for f in _iter_gguf_files(search_dir):
|
||||
if _is_mmproj(f.name):
|
||||
return str(f.resolve())
|
||||
return None
|
||||
|
|
@ -942,7 +954,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
|
|||
p = Path(path)
|
||||
|
||||
# Case 1: direct .gguf file
|
||||
if p.suffix == ".gguf" and p.is_file():
|
||||
if p.suffix.lower() == ".gguf" and p.is_file():
|
||||
if _is_mmproj(p.name):
|
||||
return None
|
||||
return str(p.resolve())
|
||||
|
|
@ -950,7 +962,7 @@ def detect_gguf_model(path: str) -> Optional[str]:
|
|||
# Case 2: directory containing .gguf files (skip mmproj)
|
||||
if p.is_dir():
|
||||
gguf_files = sorted(
|
||||
(f for f in p.glob("*.gguf") if not _is_mmproj(f.name)),
|
||||
(f for f in _iter_gguf_files(p) if not _is_mmproj(f.name)),
|
||||
key = lambda f: f.stat().st_size,
|
||||
reverse = True,
|
||||
)
|
||||
|
|
@ -1015,7 +1027,7 @@ def _pick_best_gguf(filenames: list[str]) -> Optional[str]:
|
|||
Prefers quantization levels in _GGUF_QUANT_PREFERENCE order.
|
||||
Falls back to the first .gguf file found.
|
||||
"""
|
||||
gguf_files = [f for f in filenames if f.endswith(".gguf")]
|
||||
gguf_files = [f for f in filenames if f.lower().endswith(".gguf")]
|
||||
if not gguf_files:
|
||||
return None
|
||||
|
||||
|
|
@ -1100,7 +1112,7 @@ def list_gguf_variants(
|
|||
|
||||
for sibling in info.siblings:
|
||||
fname = sibling.rfilename
|
||||
if not fname.endswith(".gguf"):
|
||||
if not fname.lower().endswith(".gguf"):
|
||||
continue
|
||||
size = sibling.size or 0
|
||||
|
||||
|
|
@ -1171,7 +1183,7 @@ def list_local_gguf_variants(
|
|||
quant_first_file: dict[str, str] = {}
|
||||
has_vision = False
|
||||
|
||||
for f in sorted(p.glob("*.gguf")):
|
||||
for f in sorted(_iter_gguf_files(p)):
|
||||
if _is_mmproj(f.name):
|
||||
has_vision = True
|
||||
continue
|
||||
|
|
@ -1210,7 +1222,7 @@ def _find_local_gguf_by_variant(directory: str, variant: str) -> Optional[str]:
|
|||
|
||||
matches = sorted(
|
||||
f
|
||||
for f in p.glob("*.gguf")
|
||||
for f in _iter_gguf_files(p)
|
||||
if not _is_mmproj(f.name) and _extract_quant_label(f.name) == variant
|
||||
)
|
||||
if matches:
|
||||
|
|
@ -1438,7 +1450,9 @@ def scan_exported_models(
|
|||
|
||||
# Check for flat GGUF export (e.g. exports/gemma-3-4b-it-finetune-gguf/)
|
||||
# Filter out mmproj (vision projection) files — they aren't loadable as main models
|
||||
gguf_files = [f for f in run_dir.glob("*.gguf") if not _is_mmproj(f.name)]
|
||||
gguf_files = [
|
||||
f for f in _iter_gguf_files(run_dir) if not _is_mmproj(f.name)
|
||||
]
|
||||
if gguf_files:
|
||||
base_model = None
|
||||
export_meta = run_dir / "export_metadata.json"
|
||||
|
|
@ -1465,7 +1479,7 @@ def scan_exported_models(
|
|||
has_weights = any(checkpoint_dir.glob("*.safetensors")) or any(
|
||||
checkpoint_dir.glob("*.bin")
|
||||
)
|
||||
has_gguf = any(checkpoint_dir.glob("*.gguf"))
|
||||
has_gguf = any(_iter_gguf_files(checkpoint_dir))
|
||||
|
||||
base_model = None
|
||||
export_type = None
|
||||
|
|
@ -1488,7 +1502,7 @@ def scan_exported_models(
|
|||
pass
|
||||
elif has_gguf:
|
||||
export_type = "gguf"
|
||||
gguf_list = list(checkpoint_dir.glob("*.gguf"))
|
||||
gguf_list = list(_iter_gguf_files(checkpoint_dir))
|
||||
# Check checkpoint_dir first, then fall back to parent run_dir
|
||||
# (export.py writes metadata to the top-level export directory)
|
||||
for meta_dir in (checkpoint_dir, run_dir):
|
||||
|
|
|
|||
|
|
@ -399,10 +399,14 @@ function GgufVariantExpander({
|
|||
);
|
||||
}
|
||||
|
||||
// ── Detect GGUF repos by naming convention ────────────────────
|
||||
// ── Detect GGUF repos by naming convention or hub tag ────────────────────
|
||||
|
||||
function isGgufRepo(id: string): boolean {
|
||||
return id.toUpperCase().includes("-GGUF");
|
||||
function hasGgufSuffix(id: string): boolean {
|
||||
return /-GGUF(?:$|-)/i.test(id);
|
||||
}
|
||||
|
||||
function isGgufRepo(id: string, hintedIsGguf?: boolean): boolean {
|
||||
return Boolean(hintedIsGguf) || hasGgufSuffix(id);
|
||||
}
|
||||
|
||||
/** Extract param count label from model name (e.g. "Qwen3-0.6B" -> "0.6B"). */
|
||||
|
|
@ -451,6 +455,33 @@ export function HubModelPicker({
|
|||
const { results, isLoading, isLoadingMore, fetchMore } =
|
||||
useHfModelSearch(debouncedQuery);
|
||||
|
||||
// Sets of lowercased repo ids that the store or HF search have
|
||||
// confirmed are GGUF. Absence means "no hint" and lets hasGgufSuffix
|
||||
// take over as fallback, rather than conflating unknown with known-
|
||||
// not-GGUF. Keys are lowercased so that store IDs and HF search IDs
|
||||
// that differ only by casing still match the same hint.
|
||||
const modelGgufIds = useMemo(() => {
|
||||
const ids = new Set<string>();
|
||||
for (const model of models) {
|
||||
if (model.isGguf) ids.add(model.id.toLowerCase());
|
||||
}
|
||||
return ids;
|
||||
}, [models]);
|
||||
const resultGgufIds = useMemo(() => {
|
||||
const ids = new Set<string>();
|
||||
for (const result of results) {
|
||||
if (result.isGguf) ids.add(result.id.toLowerCase());
|
||||
}
|
||||
return ids;
|
||||
}, [results]);
|
||||
const isKnownGgufRepo = useCallback(
|
||||
(id: string): boolean => {
|
||||
const key = id.toLowerCase();
|
||||
return isGgufRepo(id, resultGgufIds.has(key) || modelGgufIds.has(key));
|
||||
},
|
||||
[modelGgufIds, resultGgufIds],
|
||||
);
|
||||
|
||||
// Track which GGUF repo is expanded for variant selection
|
||||
const [expandedGguf, setExpandedGguf] = useState<string | null>(null);
|
||||
|
||||
|
|
@ -625,17 +656,17 @@ export function HubModelPicker({
|
|||
const recommendedIds = useMemo(() => {
|
||||
const all = dedupe([...models.map((model) => model.id), value ?? ""])
|
||||
.filter((id) => !downloadedSet.has(id.toLowerCase()))
|
||||
.filter((id) => !chatOnly || isGgufRepo(id))
|
||||
.filter((id) => !chatOnly || isKnownGgufRepo(id))
|
||||
.filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
|
||||
// Sort: GGUFs first, then hub models
|
||||
const gguf: string[] = [];
|
||||
const hub: string[] = [];
|
||||
for (const id of all) {
|
||||
if (isGgufRepo(id)) gguf.push(id);
|
||||
if (isKnownGgufRepo(id)) gguf.push(id);
|
||||
else hub.push(id);
|
||||
}
|
||||
return [...gguf, ...hub];
|
||||
}, [models, value, downloadedSet, chatOnly]);
|
||||
}, [models, value, downloadedSet, chatOnly, isKnownGgufRepo]);
|
||||
|
||||
// Infinite scroll paging for the recommended section
|
||||
const [recommendedPage, setRecommendedPage] = useState(1);
|
||||
|
|
@ -645,7 +676,7 @@ export function HubModelPicker({
|
|||
}, [models, chatOnly]);
|
||||
|
||||
const visibleRecommendedIds = useMemo(() => {
|
||||
const hubStartIndex = recommendedIds.findIndex((id) => !isGgufRepo(id));
|
||||
const hubStartIndex = recommendedIds.findIndex((id) => !isKnownGgufRepo(id));
|
||||
const allGguf =
|
||||
hubStartIndex === -1
|
||||
? recommendedIds
|
||||
|
|
@ -659,7 +690,7 @@ export function HubModelPicker({
|
|||
result.push(...allHub.slice(p * 4, (p + 1) * 4));
|
||||
}
|
||||
return result;
|
||||
}, [recommendedIds, recommendedPage]);
|
||||
}, [recommendedIds, recommendedPage, isKnownGgufRepo]);
|
||||
|
||||
const hasMoreRecommended =
|
||||
visibleRecommendedIds.length < recommendedIds.length;
|
||||
|
|
@ -681,8 +712,8 @@ export function HubModelPicker({
|
|||
const ids = showHfSection
|
||||
? [...new Set([...visibleRecommendedIds, ...filteredRecommendedIds])]
|
||||
: visibleRecommendedIds;
|
||||
return ids.filter((id) => !isGgufRepo(id));
|
||||
}, [visibleRecommendedIds, showHfSection, filteredRecommendedIds]);
|
||||
return ids.filter((id) => !isKnownGgufRepo(id));
|
||||
}, [visibleRecommendedIds, showHfSection, filteredRecommendedIds, isKnownGgufRepo]);
|
||||
const { paramCountById: recommendedParamCountById } =
|
||||
useRecommendedModelVram(idsForVram);
|
||||
|
||||
|
|
@ -697,9 +728,9 @@ export function HubModelPicker({
|
|||
return results
|
||||
.map((result) => result.id)
|
||||
.filter((id) => !recommendedSet.has(id))
|
||||
.filter((id) => !chatOnly || isGgufRepo(id))
|
||||
.filter((id) => !chatOnly || isKnownGgufRepo(id))
|
||||
.filter((id) => !/-FP8[-.]|FP8-Dynamic/i.test(id));
|
||||
}, [recommendedSet, results, showHfSection, chatOnly]);
|
||||
}, [recommendedSet, results, showHfSection, chatOnly, isKnownGgufRepo]);
|
||||
|
||||
const metricsById = useMemo(
|
||||
() =>
|
||||
|
|
@ -800,14 +831,14 @@ export function HubModelPicker({
|
|||
/** Handle clicking a model row — GGUF repos expand, others load directly. */
|
||||
const handleModelClick = useCallback(
|
||||
(id: string) => {
|
||||
if (isGgufRepo(id)) {
|
||||
if (isKnownGgufRepo(id)) {
|
||||
// Toggle GGUF variant expander
|
||||
setExpandedGguf((prev) => (prev === id ? null : id));
|
||||
} else {
|
||||
onSelect(id, { source: "hub", isLora: false });
|
||||
}
|
||||
},
|
||||
[onSelect],
|
||||
[onSelect, isKnownGgufRepo],
|
||||
);
|
||||
|
||||
return (
|
||||
|
|
@ -848,7 +879,11 @@ export function HubModelPicker({
|
|||
label={c.repo_id}
|
||||
meta={`GGUF · ${formatBytes(c.size_bytes)}`}
|
||||
selected={value === c.repo_id}
|
||||
onClick={() => handleModelClick(c.repo_id)}
|
||||
onClick={() =>
|
||||
setExpandedGguf((prev) =>
|
||||
prev === c.repo_id ? null : c.repo_id,
|
||||
)
|
||||
}
|
||||
vramStatus={null}
|
||||
/>
|
||||
{expandedGguf === c.repo_id && (
|
||||
|
|
@ -909,7 +944,7 @@ export function HubModelPicker({
|
|||
<ModelRow
|
||||
label={m.model_id ?? m.display_name}
|
||||
meta={
|
||||
isGguf || m.path.endsWith(".gguf") ? "GGUF" : "Local"
|
||||
isGguf || m.path.toLowerCase().endsWith(".gguf") ? "GGUF" : "Local"
|
||||
}
|
||||
selected={value === m.id}
|
||||
onClick={() => {
|
||||
|
|
@ -1036,7 +1071,7 @@ export function HubModelPicker({
|
|||
const isGguf =
|
||||
isGgufRepo(m.id) ||
|
||||
isGgufRepo(m.display_name) ||
|
||||
m.path.endsWith(".gguf");
|
||||
m.path.toLowerCase().endsWith(".gguf");
|
||||
return (
|
||||
<div key={m.id}>
|
||||
<ModelRow
|
||||
|
|
@ -1089,16 +1124,22 @@ export function HubModelPicker({
|
|||
<ModelRow
|
||||
label={id}
|
||||
meta={
|
||||
isGgufRepo(id)
|
||||
isKnownGgufRepo(id)
|
||||
? "GGUF"
|
||||
: (vram?.detail ?? extractParamLabel(id))
|
||||
}
|
||||
selected={value === id}
|
||||
onClick={() => handleModelClick(id)}
|
||||
onClick={() => {
|
||||
if (isKnownGgufRepo(id)) {
|
||||
setExpandedGguf((prev) => (prev === id ? null : id));
|
||||
} else {
|
||||
handleModelClick(id);
|
||||
}
|
||||
}}
|
||||
vramStatus={
|
||||
isGgufRepo(id) ? null : (vram?.status ?? null)
|
||||
isKnownGgufRepo(id) ? null : (vram?.status ?? null)
|
||||
}
|
||||
vramEst={isGgufRepo(id) ? undefined : vram?.est}
|
||||
vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
|
||||
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
|
||||
/>
|
||||
{expandedGguf === id && (
|
||||
|
|
@ -1136,16 +1177,22 @@ export function HubModelPicker({
|
|||
<ModelRow
|
||||
label={id}
|
||||
meta={
|
||||
isGgufRepo(id)
|
||||
isKnownGgufRepo(id)
|
||||
? "GGUF"
|
||||
: (vram?.detail ?? extractParamLabel(id))
|
||||
}
|
||||
selected={value === id}
|
||||
onClick={() => handleModelClick(id)}
|
||||
onClick={() => {
|
||||
if (isKnownGgufRepo(id)) {
|
||||
setExpandedGguf((prev) => (prev === id ? null : id));
|
||||
} else {
|
||||
handleModelClick(id);
|
||||
}
|
||||
}}
|
||||
vramStatus={
|
||||
isGgufRepo(id) ? null : (vram?.status ?? null)
|
||||
isKnownGgufRepo(id) ? null : (vram?.status ?? null)
|
||||
}
|
||||
vramEst={isGgufRepo(id) ? undefined : vram?.est}
|
||||
vramEst={isKnownGgufRepo(id) ? undefined : vram?.est}
|
||||
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
|
||||
/>
|
||||
{expandedGguf === id && (
|
||||
|
|
@ -1175,24 +1222,31 @@ export function HubModelPicker({
|
|||
No matching models.
|
||||
</div>
|
||||
) : null
|
||||
) : (
|
||||
) : (
|
||||
hfIds.map((id) => {
|
||||
const vram = vramMap.get(id);
|
||||
const isSearchGguf = isKnownGgufRepo(id);
|
||||
return (
|
||||
<div key={id}>
|
||||
<ModelRow
|
||||
label={id}
|
||||
meta={
|
||||
isGgufRepo(id)
|
||||
isSearchGguf
|
||||
? "GGUF"
|
||||
: (metricsById.get(id) ?? extractParamLabel(id))
|
||||
}
|
||||
selected={value === id}
|
||||
onClick={() => handleModelClick(id)}
|
||||
onClick={() => {
|
||||
if (isSearchGguf) {
|
||||
setExpandedGguf((prev) => (prev === id ? null : id));
|
||||
} else {
|
||||
handleModelClick(id);
|
||||
}
|
||||
}}
|
||||
vramStatus={
|
||||
isGgufRepo(id) ? null : (vram?.status ?? null)
|
||||
isSearchGguf ? null : (vram?.status ?? null)
|
||||
}
|
||||
vramEst={isGgufRepo(id) ? undefined : vram?.est}
|
||||
vramEst={isSearchGguf ? undefined : vram?.est}
|
||||
gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
|
||||
/>
|
||||
{expandedGguf === id && (
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ export interface ModelOption {
|
|||
name: string;
|
||||
description?: string;
|
||||
icon?: ReactNode;
|
||||
isGguf?: boolean;
|
||||
}
|
||||
|
||||
export interface LoraModelOption extends ModelOption {
|
||||
|
|
@ -24,4 +25,3 @@ export interface ModelSelectorChangeMeta {
|
|||
isDownloaded?: boolean;
|
||||
expectedBytes?: number;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -691,6 +691,7 @@ export function ChatPage(): ReactElement {
|
|||
id: model.id,
|
||||
name: model.name,
|
||||
description: model.description,
|
||||
isGguf: model.isGguf,
|
||||
})),
|
||||
[modelsFromStore],
|
||||
);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ export interface HfModelResult {
|
|||
likes: number;
|
||||
totalParams?: number;
|
||||
estimatedSizeBytes?: number;
|
||||
isGguf: boolean;
|
||||
}
|
||||
|
||||
const EXCLUDED_TAGS = new Set([
|
||||
|
|
@ -89,7 +90,10 @@ function makeMapModel(excludeGguf: boolean) {
|
|||
if (!isEmbedding && m.tags?.some((t) => EXCLUDED_TAGS.has(t))) {
|
||||
return null;
|
||||
}
|
||||
if (excludeGguf && m.tags?.includes("gguf")) {
|
||||
const isGguf =
|
||||
Boolean(m.tags?.some((tag) => tag.toLowerCase() === "gguf")) ||
|
||||
/-GGUF(?:$|-)/i.test(m.name);
|
||||
if (excludeGguf && isGguf) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
|
|
@ -98,6 +102,7 @@ function makeMapModel(excludeGguf: boolean) {
|
|||
likes: m.likes,
|
||||
totalParams: m.safetensors?.total,
|
||||
estimatedSizeBytes: estimateSizeFromDtypes(m.safetensors?.parameters),
|
||||
isGguf,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -330,4 +335,3 @@ export function useHfModelSearch(
|
|||
|
||||
return { ...search, results };
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue