I tried applying the video super resolution (EDVR) on other data, but I'm getting very weak results. The output barely seems to differ from the input in quality. Examples below (left is output, right is zoomed in input).
I tried both the EDVR_REDS_SR_L and the EDVR_Vimeo90K_SR_L models, with varying input sizes, getting similar results. Is this to be expected? I would guess given the REDS4 dataset was also mostly street scenes, it should at least perform similarly.
The code I'm using (adapted from test_Vid4_REDS4_with_GT.py and moved to the root folder of the repos. Although I tested it on the REDS4 dataset with no issues.
Test Vid4 (SR) and REDS4 (SR-clean, SR-blur, deblur-clean, deblur-compression) datasets
'''
import sys
sys.path.insert(0, 'codes')
import os
import os.path as osp
import glob
import logging
import numpy as np
import cv2
import torch
import utils.util as util
import data.util as data_util
import models.archs.EDVR_arch as EDVR_arch
#################
# configurations
#################
device = torch.device('cuda')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
data_mode = 'sharp_bicubic' # Vid4 | sharp_bicubic | blur_bicubic | blur | blur_comp
# Vid4: SR
# REDS4: sharp_bicubic (SR-clean), blur_bicubic (SR-blur);
# blur (deblur-clean), blur_comp (deblur-compression).
stage = 1 # 1 or 2, use two stage strategy for REDS dataset.
flip_test = False
############################################################################
#### model
model_path = 'experiments/pretrained_models/EDVR_REDS_SR_L.pth'
N_in = 5 # use N_in images to restore one HR image
predeblur, HR_in = False, False
back_RBs = 40
model = EDVR_arch.EDVR(128, N_in, 8, 5, back_RBs, predeblur=predeblur, HR_in=HR_in)
test_dataset_folder = 'datasets/streetscenes'
#### evaluation
crop_border = 0
border_frame = N_in // 2 # border frames when evaluate
# temporal padding mode
if data_mode == 'Vid4' or data_mode == 'sharp_bicubic':
padding = 'new_info'
else:
padding = 'replicate'
save_imgs = True
save_folder = 'results/streetscenes'
util.mkdirs(save_folder)
util.setup_logger('base', save_folder, 'test', level=logging.INFO, screen=True, tofile=True)
logger = logging.getLogger('base')
#### log info
logger.info('Data: {} - {}'.format(data_mode, test_dataset_folder))
logger.info('Padding mode: {}'.format(padding))
logger.info('Model path: {}'.format(model_path))
logger.info('Save images: {}'.format(save_imgs))
logger.info('Flip test: {}'.format(flip_test))
#### set up the models
model.load_state_dict(torch.load(model_path), strict=True)
model.eval()
model = model.to(device)
img_path_l = sorted(glob.glob(osp.join(test_dataset_folder, '*')))
max_idx = len(img_path_l)
if save_imgs:
util.mkdirs(save_folder)
#### read LQ and GT images
imgs_LQ = data_util.read_img_seq(test_dataset_folder)
# process each image
for img_idx, img_path in enumerate(img_path_l):
print(img_idx, img_path)
img_name = osp.splitext(osp.basename(img_path))[0]
select_idx = data_util.index_generation(img_idx, max_idx, N_in, padding=padding)
print('select_idx:', select_idx)
imgs_in = imgs_LQ.index_select(0, torch.LongTensor(select_idx)).unsqueeze(0).to(device)
output = util.single_forward(model, imgs_in)
output = util.tensor2img(output.squeeze(0))
if save_imgs:
cv2.imwrite(osp.join(save_folder, '{}.png'.format(img_name)), output)