Source code for dvt.input
# -*- coding: utf-8 -*-
"""Input objects
"""
import glob
import itertools
import cv2
import numpy as np
from .core import VisualInput, FrameBatch
from .utils import _expand_path
[docs]class FrameInput(VisualInput):
"""An input object for extracting batches of images from an input video.
Once initialized, subsequent calls to the next_batch method should be
called to cycle through batches of frames. The continue_read flag will be
turn false when all of data from the source has been returned within a
batch. Note that this does not include the look-ahead region. The final
batch will include padding by zeros (black) in this region.
Attributes:
bsize (int): Number of frames in a batch.
fcount (int): Frame counter for the first frame in the current batch.
vname (str): Name of the video file.
continue_read (bool): Indicates whether there more frames to read from
the input.
start (float): Time code at the start of the current batch.
end (float): Time code at the end of the current batch.
meta (dict): A dictionary containing additional metadata about the
video file.
"""
def __init__(self, **kwargs):
"""Construct a new input from a video file.
Args:
input_path (str): Path to the video file. Can be any file readable
by the OpenCV function VideoCapture.
bsize (int): Number of frames to include in a batch. Defaults to
256.
"""
self.input_path = _expand_path(kwargs["input_path"])[0]
self.bsize = kwargs.get("bsize", 256)
self.meta = None
[docs] def open_input(self):
"""Open connection to the video file.
"""
# start settings to
self.fcount = 0
self.continue_read = True
self.start = 0
self.end = 0
self._video_cap = cv2.VideoCapture(self.input_path)
self.meta = self._metadata()
self._img = np.zeros(
(self.bsize * 2, self.meta["height"], self.meta["width"], 3),
dtype=np.uint8,
)
self._fill_bandwidth() # fill the buffer with the first batch
self._continue = True # is there any more input left in the video
[docs] def next_batch(self):
"""Move forward one batch and return the current FrameBatch object.
Returns:
A FrameBatch object that contains the next set of frames.
"""
assert self.continue_read, "No more input to read."
# shift window over by one bandwidth
self._img[:self.bsize, :, :, :] = self._img[self.bsize:, :, :, :]
# fill up the bandwidth; with zeros at and of video input
if self._continue:
self._fill_bandwidth()
else:
self.continue_read = self._continue
self._img[self.bsize:, :, :, :] = 0
# update counters
frame_start = self.fcount
self.start = self.end
self.end = self._video_cap.get(cv2.CAP_PROP_POS_MSEC)
self.fcount = self.fcount + self.bsize
# get frame names
fnames = list(range(int(frame_start), int(frame_start + self.bsize)))
# return batch of frames.
return FrameBatch(
img=self._img,
start=self.start,
end=self.end,
continue_read=self.continue_read,
fnames=fnames,
bnum=(frame_start // self.bsize),
)
def _metadata(self):
"""Fill metadata attribute using metadata from the video source.
"""
path, bname, filename, file_extension = _expand_path(self.input_path)
return {
"type": "video",
"fps": self._video_cap.get(cv2.CAP_PROP_FPS),
"frames": int(self._video_cap.get(cv2.CAP_PROP_FRAME_COUNT)),
"height": int(self._video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
"width": int(self._video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
"input_path": path,
"input_bname": bname,
"input_filename": filename,
"input_file_extension": file_extension,
}
def _fill_bandwidth(self):
"""Read in the next set of frames from disk and store results.
This should not be called directly, but only through the next_batch
method. Otherwise the internal counters will become inconsistent.
"""
for idx in range(self.bsize):
self._continue, frame = self._video_cap.read()
if self._continue:
rgb_id = cv2.COLOR_BGR2RGB
self._img[idx + self.bsize, :, :, :] = cv2.cvtColor(
frame, rgb_id
)
else:
self._img[idx + self.bsize, :, :, :] = 0
[docs]class ImageInput(VisualInput):
"""An input object for create batches of images from input images.
Once initialized, subsequent calls to the next_batch method should be
called to cycle through batches of frames. The continue_read flag will be
turn false when all of data from the sources has been returned. Note that
the batch will always be of size 1 and include a look-ahead region of all
black pixels. This is needed because not all images will be the same size.
Attributes:
bsize (int): Number of frames in a batch. Always 1.
vname (str): Name of the video file.
continue_read (bool): Indicates whether there more frames to read from
the input.
fcount (int): Pointer to the next image to return.
meta (dict): A dictionary containing additional metadata about the
input images.
"""
def __init__(self, **kwargs):
"""Construct a new input from a set of paths.
Args:
input_paths (str or list): Paths the images. Will use glob
expansion on the elements.
"""
self.continue_read = True
self.fcount = 0
# find input paths
input_paths = kwargs.get("input_paths")
if not isinstance(input_paths, list):
input_paths = [input_paths]
input_paths = [glob.glob(x, recursive=True) for x in input_paths]
self.paths = list(itertools.chain.from_iterable(input_paths))
# create metadata
self.meta = {"type": "image", "paths": self.paths}
[docs] def next_batch(self):
"""Move forward one batch and return the current FrameBatch object.
Returns:
A FrameBatch object that contains the next set of frames.
"""
assert self.continue_read, "No more input to read."
this_index = self.fcount
# read the next image and create buffer
img = cv2.imread(self.paths[this_index])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.stack([img, np.zeros_like(img)])
# is this the last image?
self.fcount += 1
if self.fcount >= len(self.paths):
self.continue_read = False
# return batch of frames.
return FrameBatch(
img=img,
start=float(this_index),
end=float(this_index),
continue_read=self.continue_read,
fnames=[self.paths[this_index]],
bnum=this_index
)