mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
feat(stable-diffusion.ggml): add support for video generation (#9420)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
6e49dba27c
commit
054c4b4b45
7 changed files with 439 additions and 25 deletions
|
|
@ -26,6 +26,10 @@
|
|||
#include "stb_image_resize.h"
|
||||
#include <stdlib.h>
|
||||
#include <regex>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
|
||||
|
||||
|
|
@ -980,6 +984,251 @@ int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, cha
|
|||
return !ret;
|
||||
}
|
||||
|
||||
// ---------------- Video generation ----------------
|
||||
|
||||
sd_vid_gen_params_t* sd_vid_gen_params_new(void) {
|
||||
sd_vid_gen_params_t *params = (sd_vid_gen_params_t *)std::malloc(sizeof(sd_vid_gen_params_t));
|
||||
sd_vid_gen_params_init(params);
|
||||
sd_sample_params_init(¶ms->sample_params);
|
||||
sd_sample_params_init(¶ms->high_noise_sample_params);
|
||||
sd_cache_params_init(¶ms->cache);
|
||||
return params;
|
||||
}
|
||||
|
||||
// Persistent storage for cleaned video prompts (kept alive for the duration of generation)
|
||||
static std::string cleaned_vid_prompt_storage;
|
||||
static std::string cleaned_vid_negative_prompt_storage;
|
||||
|
||||
void sd_vid_gen_params_set_prompts(sd_vid_gen_params_t *params, const char *prompt, const char *negative_prompt) {
|
||||
lora_vec.clear();
|
||||
lora_strings.clear();
|
||||
|
||||
std::string prompt_str = prompt ? prompt : "";
|
||||
std::string negative_prompt_str = negative_prompt ? negative_prompt : "";
|
||||
|
||||
const char* lora_dir_to_use = lora_dir_path.empty() ? nullptr : lora_dir_path.c_str();
|
||||
|
||||
auto [loras, cleaned_prompt] = parse_loras_from_prompt(prompt_str, lora_dir_to_use);
|
||||
lora_vec = loras;
|
||||
cleaned_vid_prompt_storage = cleaned_prompt;
|
||||
|
||||
auto [neg_loras, cleaned_negative] = parse_loras_from_prompt(negative_prompt_str, lora_dir_to_use);
|
||||
cleaned_vid_negative_prompt_storage = cleaned_negative;
|
||||
|
||||
params->prompt = cleaned_vid_prompt_storage.c_str();
|
||||
params->negative_prompt = cleaned_vid_negative_prompt_storage.c_str();
|
||||
params->loras = lora_vec.empty() ? nullptr : lora_vec.data();
|
||||
params->lora_count = static_cast<uint32_t>(lora_vec.size());
|
||||
}
|
||||
|
||||
void sd_vid_gen_params_set_dimensions(sd_vid_gen_params_t *params, int width, int height) {
|
||||
params->width = width;
|
||||
params->height = height;
|
||||
}
|
||||
|
||||
void sd_vid_gen_params_set_seed(sd_vid_gen_params_t *params, int64_t seed) {
|
||||
params->seed = seed;
|
||||
}
|
||||
|
||||
void sd_vid_gen_params_set_video_frames(sd_vid_gen_params_t *params, int n) {
|
||||
params->video_frames = n;
|
||||
}
|
||||
|
||||
// Load an image file into an sd_image_t, resizing to target dims if needed.
|
||||
// Returns a heap-allocated buffer the caller must free (or nullptr on failure).
|
||||
static uint8_t* load_and_resize_image(const char* path, int target_width, int target_height, sd_image_t* out) {
|
||||
if (!path || strlen(path) == 0) {
|
||||
*out = {0, 0, 0, nullptr};
|
||||
return nullptr;
|
||||
}
|
||||
int c = 0, img_w = 0, img_h = 0;
|
||||
uint8_t* buf = stbi_load(path, &img_w, &img_h, &c, 3);
|
||||
if (!buf) {
|
||||
fprintf(stderr, "Failed to load image from '%s'\n", path);
|
||||
*out = {0, 0, 0, nullptr};
|
||||
return nullptr;
|
||||
}
|
||||
if (img_w != target_width || img_h != target_height) {
|
||||
fprintf(stderr, "Resizing image from %dx%d to %dx%d\n", img_w, img_h, target_width, target_height);
|
||||
uint8_t* resized = (uint8_t*)malloc((size_t)target_width * target_height * 3);
|
||||
if (!resized) { free(buf); *out = {0, 0, 0, nullptr}; return nullptr; }
|
||||
stbir_resize(buf, img_w, img_h, 0,
|
||||
resized, target_width, target_height, 0, STBIR_TYPE_UINT8,
|
||||
3, STBIR_ALPHA_CHANNEL_NONE, 0,
|
||||
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
|
||||
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
|
||||
STBIR_COLORSPACE_SRGB, nullptr);
|
||||
free(buf);
|
||||
buf = resized;
|
||||
}
|
||||
*out = {(uint32_t)target_width, (uint32_t)target_height, 3, buf};
|
||||
return buf;
|
||||
}
|
||||
|
||||
// Pipe raw RGB/RGBA frames to ffmpeg stdin and let it produce an MP4 at dst.
|
||||
// Uses fork+execvp to avoid shell interpretation of dst.
|
||||
static int ffmpeg_mux_raw_to_mp4(sd_image_t* frames, int num_frames, int fps, const char* dst) {
|
||||
if (num_frames <= 0 || !frames || !frames[0].data) {
|
||||
fprintf(stderr, "ffmpeg_mux: empty frames\n");
|
||||
return 1;
|
||||
}
|
||||
int width = (int)frames[0].width;
|
||||
int height = (int)frames[0].height;
|
||||
int channels = (int)frames[0].channel;
|
||||
const char* pix_fmt_in = (channels == 4) ? "rgba" : "rgb24";
|
||||
|
||||
char size_str[32];
|
||||
char fps_str[32];
|
||||
snprintf(size_str, sizeof(size_str), "%dx%d", width, height);
|
||||
snprintf(fps_str, sizeof(fps_str), "%d", fps);
|
||||
|
||||
int pipefd[2];
|
||||
if (pipe(pipefd) != 0) { perror("pipe"); return 1; }
|
||||
|
||||
pid_t pid = fork();
|
||||
if (pid < 0) { perror("fork"); close(pipefd[0]); close(pipefd[1]); return 1; }
|
||||
|
||||
if (pid == 0) {
|
||||
// child
|
||||
close(pipefd[1]);
|
||||
if (dup2(pipefd[0], STDIN_FILENO) < 0) { perror("dup2"); _exit(127); }
|
||||
close(pipefd[0]);
|
||||
std::vector<char*> argv = {
|
||||
const_cast<char*>("ffmpeg"),
|
||||
const_cast<char*>("-y"),
|
||||
const_cast<char*>("-hide_banner"),
|
||||
const_cast<char*>("-loglevel"), const_cast<char*>("warning"),
|
||||
const_cast<char*>("-f"), const_cast<char*>("rawvideo"),
|
||||
const_cast<char*>("-pix_fmt"), const_cast<char*>(pix_fmt_in),
|
||||
const_cast<char*>("-s"), size_str,
|
||||
const_cast<char*>("-framerate"), fps_str,
|
||||
const_cast<char*>("-i"), const_cast<char*>("-"),
|
||||
const_cast<char*>("-c:v"), const_cast<char*>("libx264"),
|
||||
const_cast<char*>("-pix_fmt"), const_cast<char*>("yuv420p"),
|
||||
const_cast<char*>("-movflags"), const_cast<char*>("+faststart"),
|
||||
const_cast<char*>(dst),
|
||||
nullptr
|
||||
};
|
||||
execvp(argv[0], argv.data());
|
||||
perror("execvp ffmpeg");
|
||||
_exit(127);
|
||||
}
|
||||
|
||||
// parent
|
||||
close(pipefd[0]);
|
||||
|
||||
// Ignore SIGPIPE so a dying ffmpeg surfaces via write() errno instead of killing us.
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
||||
for (int i = 0; i < num_frames; i++) {
|
||||
if (!frames[i].data) continue;
|
||||
size_t frame_bytes = (size_t)frames[i].width * frames[i].height * frames[i].channel;
|
||||
const uint8_t* p = frames[i].data;
|
||||
size_t remaining = frame_bytes;
|
||||
while (remaining > 0) {
|
||||
ssize_t n = write(pipefd[1], p, remaining);
|
||||
if (n < 0) {
|
||||
if (errno == EINTR) continue;
|
||||
perror("write frame to ffmpeg");
|
||||
close(pipefd[1]);
|
||||
int status;
|
||||
waitpid(pid, &status, 0);
|
||||
return 1;
|
||||
}
|
||||
p += n;
|
||||
remaining -= (size_t)n;
|
||||
}
|
||||
}
|
||||
close(pipefd[1]);
|
||||
|
||||
int status = 0;
|
||||
while (waitpid(pid, &status, 0) < 0) {
|
||||
if (errno != EINTR) { perror("waitpid"); return 1; }
|
||||
}
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
|
||||
fprintf(stderr, "ffmpeg exited with status %d\n", status);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int fps, char *init_image, char *end_image) {
|
||||
if (!p) return 1;
|
||||
if (!dst || strlen(dst) == 0) {
|
||||
fprintf(stderr, "gen_video: dst is empty\n");
|
||||
std::free(p);
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<int> skip_layers = {7, 8, 9};
|
||||
|
||||
fprintf(stderr, "Generating video: %dx%d, frames=%d, fps=%d, steps=%d, cfg=%.2f\n",
|
||||
p->width, p->height, p->video_frames, fps, steps, cfg_scale);
|
||||
|
||||
// Sample params (shared by both low and high-noise passes — MoE models use the high-noise
|
||||
// set during the first phase; single-model Wan2.1 ignores it. Same defaults for both is fine.)
|
||||
p->sample_params.guidance.txt_cfg = cfg_scale;
|
||||
p->sample_params.guidance.slg.layers = skip_layers.data();
|
||||
p->sample_params.guidance.slg.layer_count = skip_layers.size();
|
||||
p->sample_params.sample_method = sample_method;
|
||||
p->sample_params.sample_steps = steps;
|
||||
p->sample_params.scheduler = scheduler;
|
||||
p->sample_params.flow_shift = flow_shift;
|
||||
|
||||
p->high_noise_sample_params.guidance.txt_cfg = cfg_scale;
|
||||
p->high_noise_sample_params.guidance.slg.layers = skip_layers.data();
|
||||
p->high_noise_sample_params.guidance.slg.layer_count = skip_layers.size();
|
||||
p->high_noise_sample_params.sample_method = sample_method;
|
||||
p->high_noise_sample_params.sample_steps = steps;
|
||||
p->high_noise_sample_params.scheduler = scheduler;
|
||||
p->high_noise_sample_params.flow_shift = flow_shift;
|
||||
|
||||
// Load init/end reference images if provided (resized to output dims).
|
||||
uint8_t* init_buf = nullptr;
|
||||
uint8_t* end_buf = nullptr;
|
||||
sd_image_t init_img = {0, 0, 0, nullptr};
|
||||
sd_image_t end_img = {0, 0, 0, nullptr};
|
||||
if (init_image && strlen(init_image) > 0) {
|
||||
init_buf = load_and_resize_image(init_image, p->width, p->height, &init_img);
|
||||
if (!init_buf) { std::free(p); return 1; }
|
||||
}
|
||||
if (end_image && strlen(end_image) > 0) {
|
||||
end_buf = load_and_resize_image(end_image, p->width, p->height, &end_img);
|
||||
if (!end_buf) { if (init_buf) free(init_buf); std::free(p); return 1; }
|
||||
}
|
||||
p->init_image = init_img;
|
||||
p->end_image = end_img;
|
||||
|
||||
// Generate
|
||||
int num_frames_out = 0;
|
||||
sd_image_t* frames = generate_video(sd_c, p, &num_frames_out);
|
||||
std::free(p);
|
||||
|
||||
if (!frames || num_frames_out == 0) {
|
||||
fprintf(stderr, "generate_video produced no frames\n");
|
||||
if (init_buf) free(init_buf);
|
||||
if (end_buf) free(end_buf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Generated %d frames, muxing to %s via ffmpeg\n", num_frames_out, dst);
|
||||
|
||||
int rc = ffmpeg_mux_raw_to_mp4(frames, num_frames_out, fps, dst);
|
||||
|
||||
for (int i = 0; i < num_frames_out; i++) {
|
||||
if (frames[i].data) free(frames[i].data);
|
||||
}
|
||||
free(frames);
|
||||
if (init_buf) free(init_buf);
|
||||
if (end_buf) free(end_buf);
|
||||
|
||||
if (rc == 0) {
|
||||
fprintf(stderr, "gen_video done: %s\n", dst);
|
||||
}
|
||||
fflush(stderr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int unload() {
|
||||
free_sd_ctx(sd_c);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ type SDGGML struct {
|
|||
var (
|
||||
LoadModel func(model, model_apth string, options []uintptr, threads int32, diff int) int
|
||||
GenImage func(params uintptr, steps int, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []uintptr, refImagesCount int) int
|
||||
GenVideo func(params uintptr, steps int, dst string, cfgScale float32, fps int, initImage string, endImage string) int
|
||||
|
||||
TilingParamsSetEnabled func(params uintptr, enabled bool)
|
||||
TilingParamsSetTileSizes func(params uintptr, tileSizeX int, tileSizeY int)
|
||||
|
|
@ -34,6 +35,12 @@ var (
|
|||
ImgGenParamsSetDimensions func(params uintptr, width int, height int)
|
||||
ImgGenParamsSetSeed func(params uintptr, seed int64)
|
||||
ImgGenParamsGetVaeTilingParams func(params uintptr) uintptr
|
||||
|
||||
VidGenParamsNew func() uintptr
|
||||
VidGenParamsSetPrompts func(params uintptr, prompt string, negativePrompt string)
|
||||
VidGenParamsSetDimensions func(params uintptr, width int, height int)
|
||||
VidGenParamsSetSeed func(params uintptr, seed int64)
|
||||
VidGenParamsSetVideoFrames func(params uintptr, n int)
|
||||
)
|
||||
|
||||
// Copied from Purego internal/strings
|
||||
|
|
@ -153,3 +160,58 @@ func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sd *SDGGML) GenerateVideo(opts *pb.GenerateVideoRequest) error {
|
||||
dst := opts.Dst
|
||||
if dst == "" {
|
||||
return fmt.Errorf("dst is empty")
|
||||
}
|
||||
|
||||
width := int(opts.Width)
|
||||
height := int(opts.Height)
|
||||
if width == 0 {
|
||||
width = 512
|
||||
}
|
||||
if height == 0 {
|
||||
height = 512
|
||||
}
|
||||
|
||||
numFrames := int(opts.NumFrames)
|
||||
if numFrames <= 0 {
|
||||
numFrames = 16
|
||||
}
|
||||
|
||||
fps := int(opts.Fps)
|
||||
if fps <= 0 {
|
||||
fps = 16
|
||||
}
|
||||
|
||||
steps := int(opts.Step)
|
||||
if steps <= 0 {
|
||||
steps = 20
|
||||
}
|
||||
|
||||
cfg := opts.CfgScale
|
||||
if cfg == 0 {
|
||||
cfg = sd.cfgScale
|
||||
}
|
||||
if cfg == 0 {
|
||||
cfg = 5.0
|
||||
}
|
||||
|
||||
// sd_vid_gen_params_new allocates; gen_video frees it after the generation call.
|
||||
p := VidGenParamsNew()
|
||||
VidGenParamsSetPrompts(p, opts.Prompt, opts.NegativePrompt)
|
||||
VidGenParamsSetDimensions(p, width, height)
|
||||
VidGenParamsSetSeed(p, int64(opts.Seed))
|
||||
VidGenParamsSetVideoFrames(p, numFrames)
|
||||
|
||||
fmt.Fprintf(os.Stderr, "GenerateVideo: dst=%s size=%dx%d frames=%d fps=%d steps=%d cfg=%.2f\n",
|
||||
dst, width, height, numFrames, fps, steps, cfg)
|
||||
|
||||
ret := GenVideo(p, steps, dst, cfg, fps, opts.StartImage, opts.EndImage)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("video inference failed (code %d)", ret)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,13 @@ void sd_img_gen_params_set_seed(sd_img_gen_params_t *params, int64_t seed);
|
|||
|
||||
int load_model(const char *model, char *model_path, char* options[], int threads, int diffusionModel);
|
||||
int gen_image(sd_img_gen_params_t *p, int steps, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char* ref_images[], int ref_images_count);
|
||||
|
||||
sd_vid_gen_params_t* sd_vid_gen_params_new(void);
|
||||
void sd_vid_gen_params_set_prompts(sd_vid_gen_params_t *params, const char *prompt, const char *negative_prompt);
|
||||
void sd_vid_gen_params_set_dimensions(sd_vid_gen_params_t *params, int width, int height);
|
||||
void sd_vid_gen_params_set_seed(sd_vid_gen_params_t *params, int64_t seed);
|
||||
void sd_vid_gen_params_set_video_frames(sd_vid_gen_params_t *params, int n);
|
||||
int gen_video(sd_vid_gen_params_t *p, int steps, char *dst, float cfg_scale, int fps, char *init_image, char *end_image);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ func main() {
|
|||
libFuncs := []LibFuncs{
|
||||
{&LoadModel, "load_model"},
|
||||
{&GenImage, "gen_image"},
|
||||
{&GenVideo, "gen_video"},
|
||||
{&TilingParamsSetEnabled, "sd_tiling_params_set_enabled"},
|
||||
{&TilingParamsSetTileSizes, "sd_tiling_params_set_tile_sizes"},
|
||||
{&TilingParamsSetRelSizes, "sd_tiling_params_set_rel_sizes"},
|
||||
|
|
@ -42,6 +43,12 @@ func main() {
|
|||
{&ImgGenParamsSetDimensions, "sd_img_gen_params_set_dimensions"},
|
||||
{&ImgGenParamsSetSeed, "sd_img_gen_params_set_seed"},
|
||||
{&ImgGenParamsGetVaeTilingParams, "sd_img_gen_params_get_vae_tiling_params"},
|
||||
|
||||
{&VidGenParamsNew, "sd_vid_gen_params_new"},
|
||||
{&VidGenParamsSetPrompts, "sd_vid_gen_params_set_prompts"},
|
||||
{&VidGenParamsSetDimensions, "sd_vid_gen_params_set_dimensions"},
|
||||
{&VidGenParamsSetSeed, "sd_vid_gen_params_set_seed"},
|
||||
{&VidGenParamsSetVideoFrames, "sd_vid_gen_params_set_video_frames"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
|
|
|
|||
|
|
@ -80,51 +80,65 @@ func VideoEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfi
|
|||
return echo.ErrBadRequest
|
||||
}
|
||||
|
||||
src := ""
|
||||
if input.StartImage != "" {
|
||||
|
||||
// Stage a base64- or URL-provided image into a temp file so the
|
||||
// backend can read it as a path. Used for both start_image and
|
||||
// (optional) end_image. Returns the temp file path, or "" if the
|
||||
// input is empty. Caller is responsible for the defer-cleanup.
|
||||
stageImage := func(ref string) (string, error) {
|
||||
if ref == "" {
|
||||
return "", nil
|
||||
}
|
||||
var fileData []byte
|
||||
var err error
|
||||
// check if input.File is an URL, if so download it and save it
|
||||
// to a temporary file
|
||||
if strings.HasPrefix(input.StartImage, "http://") || strings.HasPrefix(input.StartImage, "https://") {
|
||||
out, err := downloadFile(input.StartImage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed downloading file:%w", err)
|
||||
if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
|
||||
out, derr := downloadFile(ref)
|
||||
if derr != nil {
|
||||
return "", fmt.Errorf("failed downloading file: %w", derr)
|
||||
}
|
||||
defer os.RemoveAll(out)
|
||||
|
||||
fileData, err = os.ReadFile(out)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading file:%w", err)
|
||||
return "", fmt.Errorf("failed reading file: %w", err)
|
||||
}
|
||||
|
||||
} else {
|
||||
// base 64 decode the file and write it somewhere
|
||||
// that we will cleanup
|
||||
fileData, err = base64.StdEncoding.DecodeString(input.StartImage)
|
||||
fileData, err = base64.StdEncoding.DecodeString(ref)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
// write the base64 result
|
||||
writer := bufio.NewWriter(outputFile)
|
||||
_, err = writer.Write(fileData)
|
||||
if err != nil {
|
||||
if _, err := writer.Write(fileData); err != nil {
|
||||
outputFile.Close()
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
if err := writer.Flush(); err != nil {
|
||||
outputFile.Close()
|
||||
return "", err
|
||||
}
|
||||
outputFile.Close()
|
||||
src = outputFile.Name()
|
||||
return outputFile.Name(), nil
|
||||
}
|
||||
|
||||
src, err := stageImage(input.StartImage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if src != "" {
|
||||
defer os.RemoveAll(src)
|
||||
}
|
||||
|
||||
endSrc, err := stageImage(input.EndImage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if endSrc != "" {
|
||||
defer os.RemoveAll(endSrc)
|
||||
}
|
||||
|
||||
xlog.Debug("Parameter Config", "config", config)
|
||||
|
||||
switch config.Backend {
|
||||
|
|
@ -184,7 +198,7 @@ func VideoEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfi
|
|||
input.Prompt,
|
||||
input.NegativePrompt,
|
||||
src,
|
||||
input.EndImage,
|
||||
endSrc,
|
||||
output,
|
||||
input.NumFrames,
|
||||
input.FPS,
|
||||
|
|
|
|||
|
|
@ -15167,6 +15167,62 @@
|
|||
- sd-3
|
||||
- gpu
|
||||
url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
|
||||
- name: wan-2.1-t2v-1.3b-ggml
|
||||
license: apache-2.0
|
||||
url: "github:mudler/LocalAI/gallery/wan-ggml.yaml@master"
|
||||
description: |
|
||||
Wan 2.1 T2V 1.3B — text-to-video diffusion model, GGUF-quantized for the
|
||||
stable-diffusion.cpp backend. Generates short (33-frame) 832x480 clips
|
||||
from a text prompt. Cheapest Wan variant, suitable for CPU-offloaded
|
||||
inference with ~10 GB of usable RAM.
|
||||
urls:
|
||||
- https://huggingface.co/calcuis/wan-gguf
|
||||
- https://huggingface.co/city96/umt5-xxl-encoder-gguf
|
||||
tags:
|
||||
- text-to-video
|
||||
- wan
|
||||
- video-generation
|
||||
- cpu
|
||||
- gpu
|
||||
overrides:
|
||||
parameters:
|
||||
model: wan2.1-t2v-1.3B-Q8_0.gguf
|
||||
files:
|
||||
- filename: "wan2.1-t2v-1.3B-Q8_0.gguf"
|
||||
uri: "huggingface://calcuis/wan-gguf/wan2.1-t2v-1.3B-Q8_0.gguf"
|
||||
- filename: "wan_2.1_vae.safetensors"
|
||||
uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
|
||||
- filename: "umt5-xxl-encoder-Q8_0.gguf"
|
||||
uri: "huggingface://city96/umt5-xxl-encoder-gguf/umt5-xxl-encoder-Q8_0.gguf"
|
||||
- name: wan-2.1-i2v-14b-480p-ggml
|
||||
license: apache-2.0
|
||||
url: "github:mudler/LocalAI/gallery/wan-ggml.yaml@master"
|
||||
description: |
|
||||
Wan 2.1 I2V 14B 480P — image-to-video diffusion, GGUF Q4 quantization.
|
||||
Animates a reference image into a 33-frame 480p clip. Requires more
|
||||
RAM than the 1.3B T2V variant; CPU offload enabled by default.
|
||||
urls:
|
||||
- https://huggingface.co/city96/Wan2.1-I2V-14B-480P-gguf
|
||||
tags:
|
||||
- image-to-video
|
||||
- wan
|
||||
- video-generation
|
||||
- cpu
|
||||
- gpu
|
||||
overrides:
|
||||
parameters:
|
||||
model: wan2.1-i2v-14b-480p-Q4_K_M.gguf
|
||||
options:
|
||||
- "clip_vision_path:clip_vision_h.safetensors"
|
||||
files:
|
||||
- filename: "wan2.1-i2v-14b-480p-Q4_K_M.gguf"
|
||||
uri: "huggingface://city96/Wan2.1-I2V-14B-480P-gguf/wan2.1-i2v-14b-480p-Q4_K_M.gguf"
|
||||
- filename: "wan_2.1_vae.safetensors"
|
||||
uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
|
||||
- filename: "umt5-xxl-encoder-Q8_0.gguf"
|
||||
uri: "huggingface://city96/umt5-xxl-encoder-gguf/umt5-xxl-encoder-Q8_0.gguf"
|
||||
- filename: "clip_vision_h.safetensors"
|
||||
uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/clip_vision/clip_vision_h.safetensors"
|
||||
- name: sd-1.5-ggml
|
||||
icon: https://avatars.githubusercontent.com/u/37351293
|
||||
license: creativeml-openrail-m
|
||||
|
|
|
|||
19
gallery/wan-ggml.yaml
Normal file
19
gallery/wan-ggml.yaml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
name: "wan-ggml"
|
||||
|
||||
config_file: |
|
||||
backend: stablediffusion-ggml
|
||||
step: 20
|
||||
cfg_scale: 6.0
|
||||
options:
|
||||
- "diffusion_model"
|
||||
- "vae_decode_only:false"
|
||||
- "sampler:euler"
|
||||
- "scheduler:discrete"
|
||||
- "flow_shift:3.0"
|
||||
- "diffusion_flash_attn:true"
|
||||
- "offload_params_to_cpu:true"
|
||||
- "keep_vae_on_cpu:true"
|
||||
- "keep_clip_on_cpu:true"
|
||||
- "t5xxl_path:umt5-xxl-encoder-Q8_0.gguf"
|
||||
- "vae_path:wan_2.1_vae.safetensors"
|
||||
Loading…
Reference in a new issue