Stable Diffusion (Diffusers) / Google Colab の環境でControlNet 1.1 を使ってバッチ処理で画像を作成する#4 txt2img/img2img
概要
77トークンを超えるプロンプトを使用する
VAE,Lora,textual_inversion,を使用する
txt2img/img2img対応
from_single_file対応
NSFWフィルターの黒画像を無効にする
シードの複数指定対応
前準備
こちらの作業を行ってください
https://memo.eightban.com/stable-diffusion/stable-diffusion-diffusers1
stablediffusioncontrolnetpipeline
参考情報をコメントで載せています。不要なら消してください
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel,StableDiffusionControlNetImg2ImgPipeline
#
from diffusers import UniPCMultistepScheduler
from diffusers.models import AutoencoderKL
from diffusers.utils import load_image
#import torch.utils
#from controlnet_aux import PidiNetDetector,HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector
from controlnet_aux.processor import Processor
from compel import Compel, DiffusersTextualInversionManager
#from transformers import CLIPTextModel, CLIPTokenizer
from PIL import Image, ImageOps
#import cv2
#import numpy as np
from natsort import natsorted
from huggingface_hub import HfApi
from pathlib import Path
import torch
import datetime
import os
import random
import glob
#
#device = "cuda"
#device = "cpu"
if device == "cpu":
torch_dtype=torch.float32
else:
torch_dtype=torch.float16
device = "cuda"
#
#init_img = Image.open("/content/drive/MyDrive/Images/sunflower.png")
#init_img = load_image( "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png")
file_format = "%Y%m%d_%H%M%S"
file_list1 = glob.glob(os.path.join(load_path, "*.png"))
file_list1.extend(glob.glob(os.path.join(load_path, "*.jpg")))
file_list1.extend(glob.glob(os.path.join(load_path, "*.jpeg")))
file_list1 = natsorted(file_list1)
file_list2 = glob.glob(os.path.join(controlnet_path, "*.png"))
file_list2.extend(glob.glob(os.path.join(controlnet_path, "*.jpg")))
file_list2.extend(glob.glob(os.path.join(controlnet_path, "*.jpeg")))
file_list2 = natsorted(file_list2)
if controlnet_image_loop==False :
file_list = file_list1
file_listx = file_list2
else:
file_listx = file_list1
file_list = file_list2
if not file_list2 :
file_list = file_list1
file_listx = file_list2
#画像生成に使うスケジューラー
#scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
#scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
#canny = CannyDetector()
#openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
#hed = HEDdetector.from_pretrained('lllyasviel/Annotators')
#hed = HEDdetector.from_pretrained('lllyasviel/ControlNet')
if controlnet_preprocessor_id != "":
processor = Processor(controlnet_preprocessor_id)
#controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16)
#controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble", torch_dtype=torch.float16)
#controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
#
controlnet = ControlNetModel.from_pretrained(controlnet_processor_id, torch_dtype=torch_dtype)
#tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
#text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
#vae = vae.to(device)
#text_encoder = text_encoder.to(device)
#パイプラインの作成
#pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id,
#scheduler=scheduler,#
if controlnet_image_loop==True and not file_list1:
if from_single_file==True :
vae = AutoencoderKL.from_single_file(vae)
pipe = StableDiffusionControlNetPipeline.from_single_file(model_id,
controlnet=controlnet,
#
vae=vae,
#
# tokenizer = tokenizer,
# text_encoder = text_encoder,
# custom_pipeline="lpw_stable_diffusion",
safety_checker=None,
torch_dtype=torch_dtype,
device=device)
else:
#
vae = AutoencoderKL.from_pretrained(vae)
pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id,
controlnet=controlnet,
#
vae=vae,
#custom_pipeline="lpw_stable_diffusion",
safety_checker=None,
torch_dtype=torch_dtype)
else:
if from_single_file==True :
vae = AutoencoderKL.from_single_file(vae)
pipe = StableDiffusionControlNetPipeline.from_single_file(model_id,
controlnet=controlnet,
#
vae=vae,
# custom_pipeline="lpw_stable_diffusion",
safety_checker=None,
torch_dtype=torch_dtype)
else:
vae = AutoencoderKL.from_pretrained(vae)
pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id,
controlnet=controlnet,
#
vae=vae,
# custom_pipeline="lpw_stable_diffusion",
safety_checker=None,
torch_dtype=torch_dtype)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
#
if lora_model_id != "":
pipe.load_lora_weights(lora_model_id, weight_name=lora_weight_name)
pipe.enable_model_cpu_offload()
if textual_inversion != "":
if device != "cpu":
if embed_weight_name != "":
pipe.load_textual_inversion(textual_inversion, weight_name=embed_weight_name,token=token)
else:
pipe.load_textual_inversion(textual_inversion, token=token)
#
#pipe.load_textual_inversion("embed/EasyNegative", weight_name="EasyNegative.safetensors", token="EasyNegative")
#pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2")
#
pipe.to(device)
#NSFW規制を無効化する
#if pipe.safety_checker is not None:
# pipe.safety_checker = lambda images, **kwargs: (images, False)
pipe.safety_checker = lambda images, **kwargs: (images, [False] * len(images))
seeds = []
if seed is None or seed == "-1" or seed == "-2":
init_Seed = random.randint(0, 2147483647)
else:
seedresult = ''.join(seed)
seeds = [int(x.strip()) for x in seedresult.split(',')]
init_Seed = seeds[0]
def resize_image(image, new_width, new_height):
width_ratio = new_width / image.width
height_ratio = new_height / image.height
ratio = min(width_ratio, height_ratio)
new_size = (int(image.width * ratio), int(image.height * ratio))
resized_image = image.resize(new_size, Image.ANTIALIAS)
new_image = Image.new("RGBA", (new_width, new_height), (0, 0, 0, 0))
x = (new_width - new_size[0]) // 2
y = (new_height - new_size[1]) // 2
new_image.paste(resized_image, (x, y))
return new_image
def add_padding(image, target_aspect_ratio):
original_width, original_height = image.size
if original_width / original_height > target_aspect_ratio:
new_height = original_width / target_aspect_ratio
new_width = original_width
else:
new_width = original_height * target_aspect_ratio
new_height = original_height
new_image = Image.new("RGBA", (int(new_width), int(new_height)), (0, 0, 0, 0))
x_offset = (int(new_width) - original_width) // 2
y_offset = (int(new_height) - original_height) // 2
new_image.paste(image, (x_offset, y_offset))
return new_image
def concat_tensor(t):
t_list = torch.split(t, 1, dim=0)
t = torch.cat(t_list, dim=1)
return t
def detokenize(chunk, actual_prompt):
chunk[-1] = chunk[-1].replace('</w>', '')
chanked_prompt = ''.join(chunk).strip()
while '</w>' in chanked_prompt:
if actual_prompt[chanked_prompt.find('</w>')] == ' ':
chanked_prompt = chanked_prompt.replace('</w>', ' ', 1)
else:
chanked_prompt = chanked_prompt.replace('</w>', '', 1)
actual_prompt = actual_prompt.replace(chanked_prompt,'')
return chanked_prompt.strip(), actual_prompt.strip()
def tokenize_line(line, tokenizer): # split into chunks
actual_prompt = line.lower().strip()
actual_tokens = tokenizer.tokenize(actual_prompt)
max_tokens = tokenizer.model_max_length - 2
separators = {
'comma': tokenizer.tokenize(',')[0],
'dot': tokenizer.tokenize('.')[0],
'colon': tokenizer.tokenize(':')[0]
}
chunks = []
chunk = []
for item in actual_tokens:
chunk.append(item)
if len(chunk) == max_tokens:
if chunk[-1] not in list(separators.values()):
for i in range(max_tokens-1, -1, -1):
if chunk[i] in list(separators.values()):
actual_chunk, actual_prompt = detokenize(chunk[:i+1], actual_prompt)
chunks.append(actual_chunk)
chunk = chunk[i+1:]
break
else:
actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
chunks.append(actual_chunk)
chunk = []
else:
actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
chunks.append(actual_chunk)
chunk = []
if chunk:
actual_chunk, _ = detokenize(chunk, actual_prompt)
chunks.append(actual_chunk)
return chunks
if device == "cpu":
#
pass
else:
textual_inversion_manager = DiffusersTextualInversionManager(pipe)
#
compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder,
#
textual_inversion_manager=textual_inversion_manager,
truncate_long_prompts=False,
device=device)
#positive_embeds = compel.build_conditioning_tensor(prompt)
#negative_embeds = compel.build_conditioning_tensor(negative_prompt)
#positive_embeds = compel([prompt])
#negative_embeds = compel([negative_prompt])
positive_embeds = compel(tokenize_line(prompt, pipe.tokenizer))
negative_embeds = compel(tokenize_line(negative_prompt, pipe.tokenizer))
[positive_embeds, negative_embeds] = compel.pad_conditioning_tensors_to_same_length([concat_tensor(positive_embeds), concat_tensor(negative_embeds)])
##[positive_embeds, negative_prompt] = compel.pad_conditioning_tensors_to_same_length([positive_embeds, negative_prompt])
idx2 = 0
for list_path in file_list:
print(f'idx2: {idx2}.')
idx2 += 1
if controlnet_image_loop==True :
if controlnet_first_image==True :
if idx2 > 1:
break
if controlnet_image_loop==False :
img_path = list_path
infile_name = os.path.basename(img_path)
initfile_path = os.path.join(controlnet_path, infile_name)
if not os.path.exists(initfile_path) :
initfile_path = img_path
if controlnet_first_image==True :
if file_listx:
initfile_path = file_listx[0]
else:
if file_listx:
img_path = file_listx[0]
else:
img_path = ""
initfile_path = list_path
if file_list1:
open_img = Image.open(img_path)
open_img = resize_image(open_img,width,height)
# open_img = Image.open(img_path).convert("RGB")
# canny_image = canny(open_img)
# openpose_image = openpose(open_img)
# scribble_image = hed(open_img, scribble=True)
init_img = Image.open(initfile_path)
if controlnet_image_loop==True and not file_list1:
pass
else:
init_img = resize_image(init_img,width,height)
if controlnet_preprocessor_id != "":
init_img = processor(init_img)
if controlnet_image_resize==True :
init_img = add_padding(init_img,width/height)
controlnet_save_path = f"/content/output/controlnet"
controlnet_image_name = os.path.basename(initfile_path)
controlnet_image_name_no_extension = os.path.splitext(controlnet_image_name)[0]
controlnet_image_name_extension = os.path.splitext(controlnet_image_name)[1]
controlnet_image_name = controlnet_image_name_no_extension + f".png"
controlnet_save_pathname = os.path.join(controlnet_save_path, controlnet_image_name)
#
init_img.save(controlnet_save_pathname)
image_name = os.path.basename(img_path)
image_name_no_extension = os.path.splitext(image_name)[0]
image_name_extension = os.path.splitext(image_name)[1]
if controlnet_image_loop==True and not file_list1:
# 現在の日本時間を取得
jst_dattetime = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9)))
image_name_no_extension = jst_dattetime.strftime(file_format)
idx = 0
while idx < int(batch_count):
#generator
if seed == "-2":
mSeed = random.randint(0, 2147483647)
else:
if len(seeds) > idx :
if seeds[idx] !="":
mSeed = seeds[idx]
else:
mSeed = init_Seed + idx
else:
mSeed = init_Seed + idx
generator = torch.Generator(device=device).manual_seed(mSeed)
#images = []
if controlnet_image_loop==True and not file_list1:
if device == "cpu":
image = pipe(
#
prompt=prompt,
image=init_img,
#
negative_prompt=negative_prompt,
width=width, height=height, generator=generator,
controlnet_conditioning_scale=controlnet_conditioning_scale,
guidance_scale=CFG_scale, num_inference_steps=Steps,
#max_embeddings_multiples=2,
guess_mode=guess_mode
).images[0]
else:
image = pipe(
prompt_embeds=positive_embeds,
negative_prompt_embeds=negative_embeds,
# prompt=prompt,
image=init_img,
# negative_prompt=negative_prompt,
width=width, height=height, generator=generator,
controlnet_conditioning_scale=controlnet_conditioning_scale,
guidance_scale=CFG_scale, num_inference_steps=Steps,
#max_embeddings_multiples=2,
guess_mode=guess_mode
).images[0]
else:
image = pipe(
prompt_embeds=positive_embeds,
negative_prompt_embeds=negative_embeds,
# prompt=prompt,
image=open_img,
control_image=init_img,
# negative_prompt=negative_prompt,
width=width, height=height, generator=generator,
strength=strength,
controlnet_conditioning_scale=controlnet_conditioning_scale,
guidance_scale=CFG_scale, num_inference_steps=Steps,
#max_embeddings_multiples=2,
guess_mode=guess_mode
).images[0]
#出力する画像の名前を生成する
#outfile_name = (jst_dattetime.strftime(file_format)+ "_" + str(mSeed)+ "-" + str(idx))
outfile_name = (image_name_no_extension+ "_" + controlnet_image_name_no_extension + "_" + str(mSeed)+ "-" + str(idx))
image_name = outfile_name + f".png"
#画像を保存する
save_pathname = os.path.join(save_path, image_name)
image.save(save_pathname)
idx += 1
ディスカッション
コメント一覧
まだ、コメントがありません