mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
The gallery-agent lives under .github/, which Go tooling treats as a hidden directory and excludes from './...' expansion. That means 'go mod tidy' (run on every dependabot dependency bump) repeatedly strips github.com/ghodss/yaml from go.mod/go.sum, breaking 'go run ./.github/gallery-agent' with a missing go.sum entry error. Switch to sigs.k8s.io/yaml — API-compatible with ghodss/yaml and already pulled in as a transitive dependency via non-hidden packages, so tidy can no longer remove it.
301 lines
9.3 KiB
Go
301 lines
9.3 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
|
|
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
|
|
"sigs.k8s.io/yaml"
|
|
)
|
|
|
|
var galleryIndexPath = os.Getenv("GALLERY_INDEX_PATH")
|
|
|
|
// getGalleryIndexPath returns the gallery index file path, with a default fallback
|
|
func getGalleryIndexPath() string {
|
|
if galleryIndexPath != "" {
|
|
return galleryIndexPath
|
|
}
|
|
return "gallery/index.yaml"
|
|
}
|
|
|
|
type galleryModel struct {
|
|
Name string `yaml:"name"`
|
|
Urls []string `yaml:"urls"`
|
|
}
|
|
|
|
// loadGalleryURLSet parses gallery/index.yaml once and returns the set of
|
|
// HuggingFace model URLs already present in the gallery.
|
|
func loadGalleryURLSet() (map[string]struct{}, error) {
|
|
indexPath := getGalleryIndexPath()
|
|
content, err := os.ReadFile(indexPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read %s: %w", indexPath, err)
|
|
}
|
|
|
|
var galleryModels []galleryModel
|
|
if err := yaml.Unmarshal(content, &galleryModels); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal %s: %w", indexPath, err)
|
|
}
|
|
|
|
set := make(map[string]struct{}, len(galleryModels))
|
|
for _, gm := range galleryModels {
|
|
for _, u := range gm.Urls {
|
|
set[u] = struct{}{}
|
|
}
|
|
}
|
|
|
|
// Also skip URLs already proposed in open (unmerged) gallery-agent PRs.
|
|
// The workflow injects these via EXTRA_SKIP_URLS so we don't keep
|
|
// re-proposing the same model every run while a PR is waiting to merge.
|
|
for _, line := range strings.FieldsFunc(os.Getenv("EXTRA_SKIP_URLS"), func(r rune) bool {
|
|
return r == '\n' || r == ',' || r == ' '
|
|
}) {
|
|
u := strings.TrimSpace(line)
|
|
if u != "" {
|
|
set[u] = struct{}{}
|
|
}
|
|
}
|
|
|
|
return set, nil
|
|
}
|
|
|
|
// modelAlreadyInGallery checks whether a HuggingFace model repo is already
|
|
// referenced in the gallery URL set.
|
|
func modelAlreadyInGallery(set map[string]struct{}, modelID string) bool {
|
|
_, ok := set["https://huggingface.co/"+modelID]
|
|
return ok
|
|
}
|
|
|
|
// baseModelFromTags returns the first `base_model:<repo>` value found in the
|
|
// tag list, or "" if none is present. HuggingFace surfaces the base model
|
|
// declared in the model card's YAML frontmatter as such a tag.
|
|
func baseModelFromTags(tags []string) string {
|
|
for _, t := range tags {
|
|
if strings.HasPrefix(t, "base_model:") {
|
|
return strings.TrimPrefix(t, "base_model:")
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// licenseFromTags returns the `license:<id>` value from the tag list, or "".
|
|
func licenseFromTags(tags []string) string {
|
|
for _, t := range tags {
|
|
if strings.HasPrefix(t, "license:") {
|
|
return strings.TrimPrefix(t, "license:")
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// curatedTags produces the gallery tag list from HuggingFace's raw tag set.
|
|
// Always includes llm + gguf, then adds whitelisted family / capability
|
|
// markers when they appear in the HF tag list.
|
|
func curatedTags(hfTags []string) []string {
|
|
whitelist := []string{
|
|
"gpu", "cpu",
|
|
"llama", "mistral", "mixtral", "qwen", "qwen2", "qwen3",
|
|
"gemma", "gemma2", "gemma3", "phi", "phi3", "phi4",
|
|
"deepseek", "yi", "falcon", "command-r",
|
|
"vision", "multimodal", "code", "chat",
|
|
"instruction-tuned", "reasoning", "thinking",
|
|
}
|
|
seen := map[string]struct{}{}
|
|
out := []string{"llm", "gguf"}
|
|
seen["llm"] = struct{}{}
|
|
seen["gguf"] = struct{}{}
|
|
|
|
hfSet := map[string]struct{}{}
|
|
for _, t := range hfTags {
|
|
hfSet[strings.ToLower(t)] = struct{}{}
|
|
}
|
|
for _, w := range whitelist {
|
|
if _, ok := hfSet[w]; ok {
|
|
if _, dup := seen[w]; !dup {
|
|
out = append(out, w)
|
|
seen[w] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// resolveReadme fetches a description-quality README for a (possibly
|
|
// quantized) repo: if a `base_model:` tag is present, fetch the base repo's
|
|
// README; otherwise fall back to the repo's own README.
|
|
func resolveReadme(client *hfapi.Client, modelID string, hfTags []string) (string, error) {
|
|
if base := baseModelFromTags(hfTags); base != "" && base != modelID {
|
|
if content, err := client.GetReadmeContent(base, "README.md"); err == nil && strings.TrimSpace(content) != "" {
|
|
return cleanTextContent(content), nil
|
|
}
|
|
}
|
|
content, err := client.GetReadmeContent(modelID, "README.md")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return cleanTextContent(content), nil
|
|
}
|
|
|
|
// extractDescription turns a raw HuggingFace README into a concise plain-text
|
|
// description suitable for embedding in gallery/index.yaml: strips YAML
|
|
// frontmatter, HTML tags/comments, markdown images, link URLs (keeping the
|
|
// link text), markdown tables, and then truncates at a paragraph boundary
|
|
// around ~1200 characters. Raw README should still be used for icon
|
|
// extraction — call this only for the `description:` field.
|
|
func extractDescription(readme string) string {
|
|
s := readme
|
|
|
|
// Strip leading YAML frontmatter: `---\n...\n---\n` at start of file.
|
|
if strings.HasPrefix(strings.TrimLeft(s, " \t\n"), "---") {
|
|
trimmed := strings.TrimLeft(s, " \t\n")
|
|
rest := strings.TrimPrefix(trimmed, "---")
|
|
if idx := strings.Index(rest, "\n---"); idx >= 0 {
|
|
after := rest[idx+len("\n---"):]
|
|
after = strings.TrimPrefix(after, "\n")
|
|
s = after
|
|
}
|
|
}
|
|
|
|
// Strip HTML comments and tags.
|
|
s = regexp.MustCompile(`(?s)<!--.*?-->`).ReplaceAllString(s, "")
|
|
s = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(s, "")
|
|
|
|
// Strip markdown images entirely.
|
|
s = regexp.MustCompile(`!\[[^\]]*\]\([^)]*\)`).ReplaceAllString(s, "")
|
|
// Replace markdown links `[text](url)` with just `text`.
|
|
s = regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`).ReplaceAllString(s, "$1")
|
|
|
|
// Drop table lines and horizontal rules, and flatten all leading
|
|
// whitespace: generateYAMLEntry embeds this under a `description: |`
|
|
// literal block whose indentation is set by the first non-empty line.
|
|
// If any line has extra leading whitespace (e.g. from an indented
|
|
// `<p align="center">` block in the original README), YAML will pick
|
|
// that up as the block's indent and every later line at a smaller
|
|
// indent blows the block scalar. Stripping leading whitespace here
|
|
// guarantees uniform 4-space indentation after formatTextContent runs.
|
|
var kept []string
|
|
for _, line := range strings.Split(s, "\n") {
|
|
t := strings.TrimLeft(line, " \t")
|
|
ts := strings.TrimSpace(t)
|
|
if strings.HasPrefix(ts, "|") {
|
|
continue
|
|
}
|
|
if strings.HasPrefix(ts, ":--") || strings.HasPrefix(ts, "---") || strings.HasPrefix(ts, "===") {
|
|
continue
|
|
}
|
|
kept = append(kept, t)
|
|
}
|
|
s = strings.Join(kept, "\n")
|
|
|
|
// Normalise whitespace and drop any leading blank lines so the literal
|
|
// block in YAML doesn't start with a blank first line (which would
|
|
// break the indentation detector the same way).
|
|
s = cleanTextContent(s)
|
|
s = strings.TrimLeft(s, " \t\n")
|
|
|
|
// Truncate at a paragraph boundary around maxLen chars.
|
|
const maxLen = 1200
|
|
if len(s) > maxLen {
|
|
cut := strings.LastIndex(s[:maxLen], "\n\n")
|
|
if cut < maxLen/3 {
|
|
cut = maxLen
|
|
}
|
|
s = strings.TrimRight(s[:cut], " \t\n") + "\n\n..."
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
// cleanTextContent removes trailing spaces/tabs and collapses multiple empty
|
|
// lines so README content embeds cleanly into YAML without lint noise.
|
|
func cleanTextContent(text string) string {
|
|
lines := strings.Split(text, "\n")
|
|
var cleaned []string
|
|
var prevEmpty bool
|
|
for _, line := range lines {
|
|
trimmed := strings.TrimRight(line, " \t\r")
|
|
if trimmed == "" {
|
|
if !prevEmpty {
|
|
cleaned = append(cleaned, "")
|
|
}
|
|
prevEmpty = true
|
|
} else {
|
|
cleaned = append(cleaned, trimmed)
|
|
prevEmpty = false
|
|
}
|
|
}
|
|
return strings.TrimRight(strings.Join(cleaned, "\n"), "\n")
|
|
}
|
|
|
|
// extractIconFromReadme scans README content for an image URL usable as a
|
|
// gallery entry icon.
|
|
func extractIconFromReadme(readmeContent string) string {
|
|
if readmeContent == "" {
|
|
return ""
|
|
}
|
|
|
|
markdownImageRegex := regexp.MustCompile(`(?i)!\[[^\]]*\]\(([^)]+\.(png|jpg|jpeg|svg|webp|gif))\)`)
|
|
htmlImageRegex := regexp.MustCompile(`(?i)<img[^>]+src=["']([^"']+\.(png|jpg|jpeg|svg|webp|gif))["']`)
|
|
plainImageRegex := regexp.MustCompile(`(?i)https?://[^\s<>"']+\.(png|jpg|jpeg|svg|webp|gif)`)
|
|
|
|
if m := markdownImageRegex.FindStringSubmatch(readmeContent); len(m) > 1 && strings.HasPrefix(strings.ToLower(m[1]), "http") {
|
|
return strings.TrimSpace(m[1])
|
|
}
|
|
if m := htmlImageRegex.FindStringSubmatch(readmeContent); len(m) > 1 && strings.HasPrefix(strings.ToLower(m[1]), "http") {
|
|
return strings.TrimSpace(m[1])
|
|
}
|
|
if m := plainImageRegex.FindStringSubmatch(readmeContent); len(m) > 0 && strings.HasPrefix(strings.ToLower(m[0]), "http") {
|
|
return strings.TrimSpace(m[0])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// getHuggingFaceAvatarURL returns the HF avatar URL for a user, or "".
|
|
func getHuggingFaceAvatarURL(author string) string {
|
|
if author == "" {
|
|
return ""
|
|
}
|
|
userURL := fmt.Sprintf("https://huggingface.co/api/users/%s/overview", author)
|
|
resp, err := http.Get(userURL)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return ""
|
|
}
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
var info map[string]any
|
|
if err := json.Unmarshal(body, &info); err != nil {
|
|
return ""
|
|
}
|
|
if v, ok := info["avatarUrl"].(string); ok && v != "" {
|
|
return v
|
|
}
|
|
if v, ok := info["avatar"].(string); ok && v != "" {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// extractModelIcon extracts an icon URL from the README, falling back to the
|
|
// HuggingFace user avatar.
|
|
func extractModelIcon(model ProcessedModel) string {
|
|
if icon := extractIconFromReadme(model.ReadmeContent); icon != "" {
|
|
return icon
|
|
}
|
|
if model.Author != "" {
|
|
if avatar := getHuggingFaceAvatarURL(model.Author); avatar != "" {
|
|
return avatar
|
|
}
|
|
}
|
|
return ""
|
|
}
|