对Stable Diffusion模型进行高效调优。通过Lora-Tuner模块,在训练时只需训练少规模的参数,即可高效地定制专属于你的场景的"Stable Diffusion"模型!
本模型基础的Diffusion Model采用Stable-Diffusion-v1-5预训练模型,训练模块(Lora-Tuner模块)的参数量占总模型的约0.07%。
基于 ModelScope 框架,通过调用预定义的 Pipeline 可实现快速调用。
from modelscope.pipelines import pipeline
sd_lora_pipeline = pipeline('efficient-diffusion-tuning',
'damo/multi-modal_efficient-diffusion-tuning-lora')
inputs = {'prompt': 'pale golden rod circle with old lace background'}
result = sd_lora_pipeline(inputs)
print(f'Output: {result}.')
以下过程基于fill50k数据集,实现了SD-Lora模型的训练及验证过程。
import tempfile
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.constant import DownloadMode
# 模型ID
model_id = 'damo/multi-modal_efficient-diffusion-tuning-lora'
# 加载训练集
train_dataset = MsDataset.load(
'damo/controlnet_dataset_condition_fill50k',
split='train',
download_mode=DownloadMode.FORCE_REDOWNLOAD)
# 加载验证集
eval_dataset = MsDataset.load(
'damo/controlnet_dataset_condition_fill50k',
split='validation',
download_mode=DownloadMode.FORCE_REDOWNLOAD)
tmp_dir = tempfile.TemporaryDirectory().name # 使用临时目录作为工作目录
max_epochs = 150 # 训练轮次
# 修改配置文件
def cfg_modify_fn(cfg):
cfg.train.max_epochs = max_epochs # 最大训练轮次
cfg.train.lr_scheduler.T_max = max_epochs # 学习率调度器的参数
cfg.train.optimizer.lr = 1e-4
cfg.model.inference = False # 模型状态
return cfg
# 构建训练器
kwargs = dict(
model=model_id, # 模型id
work_dir=tmp_dir, # 工作目录
train_dataset=train_dataset, # 训练集
eval_dataset=eval_dataset, # 验证集
cfg_modify_fn=cfg_modify_fn # 用于修改训练配置文件的回调函数
)
trainer = build_trainer(name='trainer', default_args=kwargs)
# 进行训练
trainer.train()
# 进行评估
result = trainer.evaluate()
print('result:', result)
import cv2
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.models import Model
# pipline
model_id_output = f"{args.work_dir}/output"
# model_id_output = 'damo/multi-modal_efficient-diffusion-tuning-control-lora'
inputs = {'prompt': 'slate gray circle with dark slate gray background'}
task = Tasks.efficient_diffusion_tuning
pipe = pipeline(task=task,
model=model_id_output)
outputs = pipe(inputs)
cv2.imwrite('result11.png', outputs['output_imgs'][0])
如果该模型对您有所帮助,请引用下面的相关的论文:
@inproceedings{hu2021lora,
title={{LoRA}: Low-Rank Adaptation of Large Language Models},
author={Hu, Edward and Shen, Yelong and Wallis, Phil and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Lu and Chen, Weizhu},
booktitle=ICLR,
year={2021}
}
@misc{rombach2021highresolution,
title={High-Resolution Image Synthesis with Latent Diffusion Models},
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
year={2021},
eprint={2112.10752},
archivePrefix={arXiv},
primaryClass={cs.CV}
}