mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
feat(diffusers): add experimental support for sd_embed-style prompt embedding (#8504)
* add experimental support for sd_embed-style prompt embedding Signed-off-by: Austen Dicken <cvpcsm@gmail.com> * add doc equivalent to compel Signed-off-by: Austen Dicken <cvpcsm@gmail.com> * need to use flux1 embedding function for flux model Signed-off-by: Austen Dicken <cvpcsm@gmail.com> --------- Signed-off-by: Austen Dicken <cvpcsm@gmail.com>
This commit is contained in:
parent
79a25f7ae9
commit
cff972094c
10 changed files with 58 additions and 0 deletions
3
.env
3
.env
|
|
@ -26,6 +26,9 @@
|
||||||
## Disables COMPEL (Diffusers)
|
## Disables COMPEL (Diffusers)
|
||||||
# COMPEL=0
|
# COMPEL=0
|
||||||
|
|
||||||
|
## Disables SD_EMBED (Diffusers)
|
||||||
|
# SD_EMBED=0
|
||||||
|
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -115,6 +115,7 @@ Available pipelines: AnimateDiffPipeline, AnimateDiffVideoToVideoPipeline, ...
|
||||||
| Variable | Default | Description |
|
| Variable | Default | Description |
|
||||||
|----------|---------|-------------|
|
|----------|---------|-------------|
|
||||||
| `COMPEL` | `0` | Enable Compel for prompt weighting |
|
| `COMPEL` | `0` | Enable Compel for prompt weighting |
|
||||||
|
| `SD_EMBED` | `0` | Enable sd_embed for prompt weighting |
|
||||||
| `XPU` | `0` | Enable Intel XPU support |
|
| `XPU` | `0` | Enable Intel XPU support |
|
||||||
| `CLIPSKIP` | `1` | Enable CLIP skip support |
|
| `CLIPSKIP` | `1` | Enable CLIP skip support |
|
||||||
| `SAFETENSORS` | `1` | Use safetensors format |
|
| `SAFETENSORS` | `1` | Use safetensors format |
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ from compel import Compel, ReturnedEmbeddingsType
|
||||||
from optimum.quanto import freeze, qfloat8, quantize
|
from optimum.quanto import freeze, qfloat8, quantize
|
||||||
from transformers import T5EncoderModel
|
from transformers import T5EncoderModel
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
from sd_embed.embedding_funcs import get_weighted_text_embeddings_sd15, get_weighted_text_embeddings_sdxl, get_weighted_text_embeddings_sd3, get_weighted_text_embeddings_flux1
|
||||||
|
|
||||||
# Import LTX-2 specific utilities
|
# Import LTX-2 specific utilities
|
||||||
from diffusers.pipelines.ltx2.export_utils import encode_video as ltx2_encode_video
|
from diffusers.pipelines.ltx2.export_utils import encode_video as ltx2_encode_video
|
||||||
|
|
@ -47,6 +48,7 @@ from diffusers import LTX2VideoTransformer3DModel, GGUFQuantizationConfig
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
COMPEL = os.environ.get("COMPEL", "0") == "1"
|
COMPEL = os.environ.get("COMPEL", "0") == "1"
|
||||||
|
SD_EMBED = os.environ.get("SD_EMBED", "0") == "1"
|
||||||
XPU = os.environ.get("XPU", "0") == "1"
|
XPU = os.environ.get("XPU", "0") == "1"
|
||||||
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
|
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
|
||||||
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
|
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
|
||||||
|
|
@ -737,6 +739,51 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
kwargs["prompt_embeds"] = conditioning
|
kwargs["prompt_embeds"] = conditioning
|
||||||
kwargs["pooled_prompt_embeds"] = pooled
|
kwargs["pooled_prompt_embeds"] = pooled
|
||||||
# pass the kwargs dictionary to the self.pipe method
|
# pass the kwargs dictionary to the self.pipe method
|
||||||
|
image = self.pipe(
|
||||||
|
guidance_scale=self.cfg_scale,
|
||||||
|
**kwargs
|
||||||
|
).images[0]
|
||||||
|
elif SD_EMBED:
|
||||||
|
if self.PipelineType == "StableDiffusionPipeline":
|
||||||
|
(
|
||||||
|
kwargs["prompt_embeds"],
|
||||||
|
kwargs["negative_prompt_embeds"],
|
||||||
|
) = get_weighted_text_embeddings_sd15(
|
||||||
|
pipe = self.pipe,
|
||||||
|
prompt = prompt,
|
||||||
|
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None,
|
||||||
|
)
|
||||||
|
if self.PipelineType == "StableDiffusionXLPipeline":
|
||||||
|
(
|
||||||
|
kwargs["prompt_embeds"],
|
||||||
|
kwargs["negative_prompt_embeds"],
|
||||||
|
kwargs["pooled_prompt_embeds"],
|
||||||
|
kwargs["negative_pooled_prompt_embeds"],
|
||||||
|
) = get_weighted_text_embeddings_sdxl(
|
||||||
|
pipe = self.pipe,
|
||||||
|
prompt = prompt,
|
||||||
|
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
|
||||||
|
)
|
||||||
|
if self.PipelineType == "StableDiffusion3Pipeline":
|
||||||
|
(
|
||||||
|
kwargs["prompt_embeds"],
|
||||||
|
kwargs["negative_prompt_embeds"],
|
||||||
|
kwargs["pooled_prompt_embeds"],
|
||||||
|
kwargs["negative_pooled_prompt_embeds"],
|
||||||
|
) = get_weighted_text_embeddings_sd3(
|
||||||
|
pipe = self.pipe,
|
||||||
|
prompt = prompt,
|
||||||
|
neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None
|
||||||
|
)
|
||||||
|
if self.PipelineType == "FluxTransformer2DModel":
|
||||||
|
(
|
||||||
|
kwargs["prompt_embeds"],
|
||||||
|
kwargs["pooled_prompt_embeds"],
|
||||||
|
) = get_weighted_text_embeddings_flux1(
|
||||||
|
pipe = self.pipe,
|
||||||
|
prompt = prompt,
|
||||||
|
)
|
||||||
|
|
||||||
image = self.pipe(
|
image = self.pipe(
|
||||||
guidance_scale=self.cfg_scale,
|
guidance_scale=self.cfg_scale,
|
||||||
**kwargs
|
**kwargs
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ transformers
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
torch==2.7.1
|
torch==2.7.1
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ transformers
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
torch
|
torch
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ transformers
|
||||||
torchvision
|
torchvision
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
torch
|
torch
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ opencv-python
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
numpy<2
|
numpy<2
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
numpy<2
|
numpy<2
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ opencv-python
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
git+https://github.com/xhinker/sd_embed
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue