reference image + text prompt
#12
by
kasiasta91
- opened
Hey!
Here is an example on how to use it with reference image + prompt:
import torch
from diffusers import FluxPriorReduxPipeline, FluxPipeline
from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
from diffusers.utils import load_image
device = "cuda"
dtype = torch.bfloat16
text_encoder = CLIPTextModel.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="text_encoder",
torch_dtype=dtype,
)
text_encoder_2 = T5EncoderModel.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="text_encoder_2",
torch_dtype=dtype,
)
tokenizer = CLIPTokenizer.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="tokenizer",
)
tokenizer_2 = T5TokenizerFast.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="tokenizer_2",
)
repo_redux = "black-forest-labs/FLUX.1-Redux-dev"
repo_base = "black-forest-labs/FLUX.1-dev"
pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained(
repo_redux,
text_encoder=text_encoder,
text_encoder_2=text_encoder_2,
tokenizer=tokenizer,
tokenizer_2=tokenizer_2,
torch_dtype=dtype
).to(device)
pipe = FluxPipeline.from_pretrained(
repo_base,
torch_dtype=dtype
).to(device)
my_image= load_image("image.png")
pipe_prior_output = pipe_prior_redux(
my_image,
prompt="",
)
images = pipe(
guidance_scale=2.5,
num_inference_steps=50,
generator=torch.Generator("cpu").manual_seed(0),
**pipe_prior_output,
).images
Hey!
Here is an example on how to use it with reference image + prompt:
import torch from diffusers import FluxPriorReduxPipeline, FluxPipeline from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast from diffusers.utils import load_image device = "cuda" dtype = torch.bfloat16 text_encoder = CLIPTextModel.from_pretrained( "black-forest-labs/FLUX.1-dev", subfolder="text_encoder", torch_dtype=dtype, ) text_encoder_2 = T5EncoderModel.from_pretrained( "black-forest-labs/FLUX.1-dev", subfolder="text_encoder_2", torch_dtype=dtype, ) tokenizer = CLIPTokenizer.from_pretrained( "black-forest-labs/FLUX.1-dev", subfolder="tokenizer", ) tokenizer_2 = T5TokenizerFast.from_pretrained( "black-forest-labs/FLUX.1-dev", subfolder="tokenizer_2", ) repo_redux = "black-forest-labs/FLUX.1-Redux-dev" repo_base = "black-forest-labs/FLUX.1-dev" pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained( repo_redux, text_encoder=text_encoder, text_encoder_2=text_encoder_2, tokenizer=tokenizer, tokenizer_2=tokenizer_2, torch_dtype=dtype ).to(device) pipe = FluxPipeline.from_pretrained( repo_base, torch_dtype=dtype ).to(device) my_image= load_image("image.png") pipe_prior_output = pipe_prior_redux( my_image, prompt="", ) images = pipe( guidance_scale=2.5, num_inference_steps=50, generator=torch.Generator("cpu").manual_seed(0), **pipe_prior_output, ).images
this can not help, When I use this to generate text logo based on an existed text logo, the text is still the text in the referenced image
Hi @ouhenio ! Thanks for your remark! I also think this does not work as expected.
For reference image like this:
And prompt "an illustration of a cute little girl with a blond hair and blue dress laying on the rainbow", I am getting something like:
which is actually image based redux with some embeddings adapted, but for me far away from this (from BFL reference page I gave in the very first post):