MDQE视频实例分割
CVPR23_MDQE视频实例分割模型
  • 模型资讯
  • 模型资料

模型描述 (Model Description)

MDQE (Mining Discriminative Query Embeddings) (CVPR2023) 是一种视频实例分割算法,对视频中遮挡严重的物体分割尤其有效。

详情可参考论文

📃Paper

模型流程图
Mask图

运行环境 (Operating environment)

Dependencies and Installation

# git clone the original repository
git clone https://github.com/MinghanLi/MDQE_CVPR2023.git
cd MDQE_CVPR2023
  • Installation guide link
# Install modelscope
pip install modelscope

代码范例 (Code example)

from modelscope.pipelines import pipeline
from modelscope.models import Model
from modelscope.outputs import OutputKeys
from PIL import Image
import os

import argparse
import glob
import multiprocessing as mp
import os
import time
import cv2
import tqdm
import numpy as np

from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from detectron2.utils.file_io import PathManager

from predictor import VisualizationDemo
from mdqe import add_mdqe_config

def setup_cfg(args):
    # load config from file and command-line arguments
    cfg = get_cfg()
    add_mdqe_config(cfg)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.MODEL.WEIGHTS = args.checkpoint
    cfg.freeze()
    return cfg


def get_parser():
    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
    parser.add_argument(
        "--config-file",
        default="configs/R50_ovis_360.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--input", nargs="+", default=["test_imgs_sub"] ,help="A list of space separated input images")
    parser.add_argument(
        "--output", default='test_out',help="A file or directory to save output visualizations."
    )

    parser.add_argument(
        "--checkpoint",
        default="mdqe_r50_ovis_bs16_360p_f4.pth",
        help="Path to the checkpoint pth",
    )
    parser.add_argument(
        "--save-frames",
        default=True,
        help="Save frame level image outputs.",
    )
    parser.add_argument(
        "--opts",
        help="Modify config options using the command-line 'KEY VALUE' pairs",
        default=[],
        nargs=argparse.REMAINDER,
    )
    return parser


args = get_parser().parse_args()

cfg = setup_cfg(args)

inference = pipeline('marquezx/cv_mdqe_video-instance-segmentation', model='marquezx/cv_mdqe_video-instance-segmentation', model_revision='v1.0.0')

if len(args.input) == 1:
    args.input = glob.glob(os.path.expanduser(args.input[0]))
    assert args.input, "The input path(s) was not found"

if not os.path.isdir(args.output):
    PathManager.mkdirs(args.output)

for vid_path in tqdm.tqdm(args.input, disable=not args.output):
    vid_file = vid_path.split("/")[-1]
    out_vid_path = os.path.join(args.output, vid_file)
    if args.save_frames and not os.path.isdir(out_vid_path):
        PathManager.mkdirs(out_vid_path)

    vid_frame_paths = sorted(PathManager.ls(vid_path))
    vid_frames = []
    for img_file in vid_frame_paths:
        img_path = os.path.join(vid_path, img_file)
        # use PIL, to be consistent with evaluation
        img = read_image(img_path, format="BGR")
        vid_frames.append(img)
    vid_frames = np.array(vid_frames)
    output = inference(vid_frames)

    predictions, visualized_output = output
    if args.save_frames:
        for img_file, _vis_output in zip(vid_frame_paths, visualized_output):
            out_filename = os.path.join(out_vid_path, img_file)
            _vis_output.save(out_filename)

    H, W = visualized_output[0].height, visualized_output[0].width

    cap = cv2.VideoCapture(-1)
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(out_vid_path + ".mp4" , fourcc, 10.0, (W, H), True)
    for _vis_output in visualized_output:
        frame = _vis_output.get_image()[:, :, ::-1]
        out.write(frame)
    cap.release()
    out.release()

Citation

If you find our work helpful for your research, please consider citing the following BibTeX entry.

@InProceedings{Li_2023_CVPR,
    author    = {Li, Minghan and Li, Shuai and Xiang, Wangmeng and Zhang, Lei},
    title     = {MDQE: Mining Discriminative Query Embeddings To Segment Occluded Instances on Challenging Videos},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    month     = {June},
    year      = {2023},
    pages     = {10524-10533}
}