MDQE (Mining Discriminative Query Embeddings) (CVPR2023) 是一种视频实例分割算法,对视频中遮挡严重的物体分割尤其有效。
详情可参考论文
# git clone the original repository
git clone https://github.com/MinghanLi/MDQE_CVPR2023.git
cd MDQE_CVPR2023
# Install modelscope
pip install modelscope
from modelscope.pipelines import pipeline
from modelscope.models import Model
from modelscope.outputs import OutputKeys
from PIL import Image
import os
import argparse
import glob
import multiprocessing as mp
import os
import time
import cv2
import tqdm
import numpy as np
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from detectron2.utils.file_io import PathManager
from predictor import VisualizationDemo
from mdqe import add_mdqe_config
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
add_mdqe_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.MODEL.WEIGHTS = args.checkpoint
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
parser.add_argument(
"--config-file",
default="configs/R50_ovis_360.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument("--input", nargs="+", default=["test_imgs_sub"] ,help="A list of space separated input images")
parser.add_argument(
"--output", default='test_out',help="A file or directory to save output visualizations."
)
parser.add_argument(
"--checkpoint",
default="mdqe_r50_ovis_bs16_360p_f4.pth",
help="Path to the checkpoint pth",
)
parser.add_argument(
"--save-frames",
default=True,
help="Save frame level image outputs.",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
args = get_parser().parse_args()
cfg = setup_cfg(args)
inference = pipeline('marquezx/cv_mdqe_video-instance-segmentation', model='marquezx/cv_mdqe_video-instance-segmentation', model_revision='v1.0.0')
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
if not os.path.isdir(args.output):
PathManager.mkdirs(args.output)
for vid_path in tqdm.tqdm(args.input, disable=not args.output):
vid_file = vid_path.split("/")[-1]
out_vid_path = os.path.join(args.output, vid_file)
if args.save_frames and not os.path.isdir(out_vid_path):
PathManager.mkdirs(out_vid_path)
vid_frame_paths = sorted(PathManager.ls(vid_path))
vid_frames = []
for img_file in vid_frame_paths:
img_path = os.path.join(vid_path, img_file)
# use PIL, to be consistent with evaluation
img = read_image(img_path, format="BGR")
vid_frames.append(img)
vid_frames = np.array(vid_frames)
output = inference(vid_frames)
predictions, visualized_output = output
if args.save_frames:
for img_file, _vis_output in zip(vid_frame_paths, visualized_output):
out_filename = os.path.join(out_vid_path, img_file)
_vis_output.save(out_filename)
H, W = visualized_output[0].height, visualized_output[0].width
cap = cv2.VideoCapture(-1)
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(out_vid_path + ".mp4" , fourcc, 10.0, (W, H), True)
for _vis_output in visualized_output:
frame = _vis_output.get_image()[:, :, ::-1]
out.write(frame)
cap.release()
out.release()
If you find our work helpful for your research, please consider citing the following BibTeX entry.
@InProceedings{Li_2023_CVPR,
author = {Li, Minghan and Li, Shuai and Xiang, Wangmeng and Zhang, Lei},
title = {MDQE: Mining Discriminative Query Embeddings To Segment Occluded Instances on Challenging Videos},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2023},
pages = {10524-10533}
}