# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
input_location = 'https://clip-multimodal.oss-cn-beijing.aliyuncs.com/xingguang/maas/data/text_driven_seg_demo.jpg'
test_input = {
'image': input_location,
'text': 'bear',
}
model_id = 'damo/cv_vitl16_segmentation_text-driven-seg'
shop_seg = pipeline(Tasks.text_driven_segmentation, model=model_id)
result = shop_seg(test_input)
import cv2
# result[OutputKeys.MASKS] is segment map result,other keys are not used
cv2.imwrite('demo_textdrivenseg.jpg', result[OutputKeys.MASKS])
训练数据共约20w通用分割粗标注数据。
该模型在fss1000数据测试集上zero shot iou: 85.14.
@inproceedings{
li2022languagedriven,
title={Language-driven Semantic Segmentation},
author={Boyi Li and Kilian Q Weinberger and Serge Belongie and Vladlen Koltun and Rene Ranftl},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=RriDjddCLN}
}