GLIGEN: Open-Set Grounded Text-to-Image Generatio
from modelscope.pipelines import pipeline
pipe = pipeline('gligen', model="jianguo_wjg/GLIGEN", model_revision="v1.0.1")
meta = dict(
prompt="a teddy bear sitting next to a bird",
phrases=['a teddy bear', 'a bird'],
locations=[[0.0, 0.09, 0.33, 0.76], [0.55, 0.11, 1.0, 0.8]],
alpha_type=[0.3, 0.0, 0.7],
save_folder_name="generation_box_text"
)
sample = pipe(meta)[0]
sample.save("out.png")
The model is licensed under the MIT license.
@article{li2023gligen,
title={GLIGEN: Open-Set Grounded Text-to-Image Generation},
author={Li, Yuheng and Liu, Haotian and Wu, Qingyang and Mu, Fangzhou and Yang, Jianwei and Gao, Jianfeng and Li, Chunyuan and Lee, Yong Jae},
journal={CVPR},
year={2023}
}
git clone https://www.modelscope.cn/jianguo_wjg/GLIGEN.git