From 36c3ef44d3267a6d15c90a5372a5e8f809f66068 Mon Sep 17 00:00:00 2001 From: Ivan Skorokhodov Date: Mon, 23 Aug 2021 16:39:06 +0300 Subject: [PATCH 01/14] feat: add the downloading script --- download.py | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 download.py diff --git a/download.py b/download.py new file mode 100644 index 0000000..3e21b61 --- /dev/null +++ b/download.py @@ -0,0 +1,242 @@ +""" +This file downloads almost all the videos from the HDTF dataset. Some videos are discarded for the following reasons: +- they do not contain cropping information because they are somewhat noisy (hand moving, background changing, etc.) +- they are not available on youtube anymore (at all or in the specified format) + +The discarded videos constitute a small portion of the dataset, so you can try to re-download them manually on your own. + +Usage: +``` +$ python download.py --output_dir /tmp/data/hdtf --num_workers 8 +``` + +You need tqdm and youtube-dl libraries to be installed for this script to work. +""" + + +import os +import argparse +from typing import List, Dict +from multiprocessing import Pool +import subprocess +from subprocess import Popen, PIPE +from urllib import parse + +from tqdm import tqdm + + +subsets = ["RD", "WDA", "WRA"] + + +def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, '_videos_raw'), exist_ok=True) + + download_queue = construct_download_queue(source_dir, output_dir) + task_kwargs = [dict( + video_data=vd, + output_dir=output_dir, + **process_video_kwargs, + ) for vd in download_queue] + pool = Pool(processes=num_workers) + tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir} (note: without sound)') + + for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): + pass + + print('Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') + print(' -', os.path.join(output_dir, '_videos_raw')) + + +def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: + download_queue = [] + + for subset in subsets: + video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) + crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) + intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) + resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) + + for video_name, (video_url,) in video_urls.items(): + if not f'{video_name}.mp4' in intervals: + print(f'Entire {subset}/{video_name} does not contain any clip intervals, hence is broken. Discarding it.') + continue + + if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: + print(f'Entire {subset}/{video_name} does not contain the resolution (or it is in a bad format), hence is broken. Discarding it.') + continue + + all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] + clips_crops = [] + clips_intervals = [] + + for clip_idx, clip_interval in enumerate(all_clips_intervals): + clip_name = f'{video_name}_{clip_idx}.mp4' + if not clip_name in crops: + print(f'Clip {subset}/{clip_name} is not present in crops, hence is broken. Discarding it.') + continue + clips_crops.append(crops[clip_name]) + clips_intervals.append(clip_interval) + + clips_crops = [list(map(int, cs)) for cs in clips_crops] + + if len(clips_crops) == 0: + print(f'Entire {subset}/{video_name} does not contain any crops, hence is broken. Discarding it.') + continue + + assert len(clips_intervals) == len(clips_crops) + assert set([len(vi) for vi in clips_intervals]) == {2}, f"Broken time interval, {clips_intervals}" + assert set([len(vc) for vc in clips_crops]) == {4}, f"Broken crops, {clips_crops}" + assert all([vc[1] == vc[3] for vc in clips_crops]), f'Some crops are not square, {clips_crops}' + + download_queue.append({ + 'name': f'{subset}_{video_name}', + 'id': parse.parse_qs(parse.urlparse(video_url).query)['v'][0], + 'intervals': clips_intervals, + 'crops': clips_crops, + 'output_dir': output_dir, + 'resolution': resolutions[f'{video_name}.mp4'][0] + }) + + return download_queue + + +def task_proxy(kwargs): + return download_and_process_video(**kwargs) + + +def download_and_process_video(video_data: Dict, output_dir: str): + """ + Downloads the video and cuts/crops it into several ones according to the provided time intervals + """ + raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") + raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") + download_result = download_video(video_data['id'], raw_download_path, resolution=video_data['resolution'], log_file=raw_download_log_file) + + if not download_result: + print('Failed to download', video_data) + print(f'See {raw_download_log_file} for details') + return + + # We do not know beforehand, what will be the resolution of the downloaded video + # Youtube-dl selects a (presumably) highest one + video_resolution = get_video_resolution(raw_download_path) + if not video_resolution != video_data['resolution']: + print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + return + + for clip_idx in range(len(video_data['intervals'])): + start, end = video_data['intervals'][clip_idx] + clip_name = f'{video_data["name"]}_{clip_idx:03d}' + clip_path = os.path.join(output_dir, clip_name + '.mp4') + crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) + + if not crop_success: + print(f'Failed to cut-and-crop clip #{clip_idx}', video_data) + continue + + +def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: + """ + Reads a file as a space-separated dataframe, where the first column is the index + """ + with open(filepath, 'r') as f: + lines = f.read().splitlines() + lines = [[v.strip() for v in l.strip().split(' ')] for l in lines] + data = {l[0]: l[1:] for l in lines} + + return data + + +def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): + """ + Download video from YouTube. + :param video_id: YouTube ID of the video. + :param download_path: Where to save the video. + :param video_format: Format to download. + :param log_file: Path to a log file for youtube-dl. + :return: Tuple: path to the downloaded video and a bool indicating success. + + Copy-pasted from https://github.com/ytdl-org/youtube-dl + """ + # if os.path.isfile(download_path): return True # File already exists + + if log_file is None: + stderr = subprocess.DEVNULL + else: + stderr = open(log_file, "a") + video_selection = f"bestvideo[ext={video_format}]" + video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" + command = [ + "youtube-dl", + "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", + video_selection, + "--output", download_path, + "--no-continue" + ] + return_code = subprocess.call(command, stderr=stderr) + success = return_code == 0 + + if log_file is not None: + stderr.close() + + return success and os.path.isfile(download_path) + + +def get_video_resolution(video_path: os.PathLike) -> int: + command = ' '.join([ + "ffprobe", + "-v", "error", + "-select_streams", "v:0", "-show_entries", "stream=height", "-of", "csv=p=0", + video_path + ]) + + process = Popen(command, stdout=PIPE, shell=True) + (output, err) = process.communicate() + return_code = process.wait() + success = return_code == 0 + + if not success: + print('Command failed:', command) + return -1 + + return int(output) + + +def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): + # if os.path.isfile(output_path): return True # File already exists + + x, out_w, y, out_h = crop + + command = ' '.join([ + "ffmpeg", "-i", raw_video_path, + "-strict", "-2", # Some legacy arguments + "-loglevel", "quiet", # Verbosity arguments + "-qscale", "0", # Preserve the quality + "-y", # Overwrite if the file exists + "-ss", str(start), "-to", str(end), # Cut arguments + "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + output_path + ]) + + return_code = subprocess.call(command, shell=True) + success = return_code == 0 + + if not success: + print('Command failed:', command) + + return success + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download HDTF dataset") + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset') + parser.add_argument('-o', '--output_dir', type=str, help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=8, help='Number of workers for downloading') + args = parser.parse_args() + + download_hdtf( + args.source_dir, + args.output_dir, + args.num_workers, + ) From 8c402f412953c77771b5c4ae03dac6dc6bdb95e2 Mon Sep 17 00:00:00 2001 From: Ivan Skorokhodov Date: Mon, 23 Aug 2021 16:44:38 +0300 Subject: [PATCH 02/14] doc: add the downloading instructions --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fcd4cd7..036b12a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # HDTF -Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset +Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset paper supplementary ## Details of HDTF dataset **./HDTF_dataset** consists of *youtube video url*, *video resolution* (in our method, may not be the best resolution), *time stamps of talking face*, *facial region* (in the our method) and *the zoom scale* of the cropped window. -**xx_video_url.txt:** +**xx_video_url.txt:** ``` @@ -29,24 +29,31 @@ format: video name+clip index | min_width | width | min_height | height (in format: video name+clip index | window zoom scale ``` - ## Processing of HDTF dataset -When using HDTF dataset, +When using HDTF dataset, - We provide video and url in **xx_video_url.txt**. (the highest definition of videos are 1080P or 720P). Transform video into **.mp4** format and transform interlaced video to progressive video as well. - We split long original video into talking head clips with time stamps in **xx_annotion_time.txt**. Name the splitted clip as **video name_clip index.mp4**. For example, split the video *Radio11.mp4 00:30-01:00 01:30-02:30* into *Radio11_0.mp4* and *Radio11_1.mp4* . - - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! + - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! 1. Download the video with reference resulotion in **xx_resolution.txt** and crop the facial region with fixed window size in **xx_crop_wh.txt**. (This method is as same as ours, but the downloaded video may not be the best resolution). - 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. + 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. - We resize all cropped videos into **512 x 512** resolution. The HDTF dataset is available to download under a Creative Commons Attribution 4.0 International License. If you face any problems when processing HDTF, pls contact me. +## Downloading +For convenience, we added the `download.py` script which downloads, crops and resizes the dataset. You can use it via the following command: +``` +python download.py --output_dir /path/to/output/dir --num_workers 8 +``` + +Note: some videos might become unavailable if the authors will remove them or make them private. + ## Reference if you use HDTF, pls reference From 03179a666af4ba270b11e9cebc5b9ce268e22607 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 11:18:38 -0800 Subject: [PATCH 03/14] Substantial refactoring. Using yt_dlp for YouTube downloads instead of youtube-dl. Added colorama library to improve readability of output. Improving multiprocessing pooling. Lots of logging improvements. Implementation of a Logger class. --- download.py | 190 +++++++++++++++++++++++++++++----------------------- 1 file changed, 108 insertions(+), 82 deletions(-) diff --git a/download.py b/download.py index 3e21b61..c9204a6 100644 --- a/download.py +++ b/download.py @@ -10,23 +10,25 @@ $ python download.py --output_dir /tmp/data/hdtf --num_workers 8 ``` -You need tqdm and youtube-dl libraries to be installed for this script to work. +You need tqdm, yt_dlp, and colorama libraries to be installed for this script to work. """ - import os import argparse +import subprocess +import pprint from typing import List, Dict from multiprocessing import Pool -import subprocess -from subprocess import Popen, PIPE from urllib import parse +import yt_dlp from tqdm import tqdm - +from colorama import init as cinit +from colorama import Fore subsets = ["RD", "WDA", "WRA"] +cinit(autoreset=True) def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): os.makedirs(output_dir, exist_ok=True) @@ -39,13 +41,15 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: **process_video_kwargs, ) for vd in download_queue] pool = Pool(processes=num_workers) - tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir} (note: without sound)') + tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir}') for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): pass + pool.close() + pool.join() - print('Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') - print(' -', os.path.join(output_dir, '_videos_raw')) + print(Fore.GREEN+'Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') + print(Fore.GREEN+' - '+os.path.join(output_dir, '_videos_raw')) def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: @@ -59,29 +63,32 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - for video_name, (video_url,) in video_urls.items(): if not f'{video_name}.mp4' in intervals: - print(f'Entire {subset}/{video_name} does not contain any clip intervals, hence is broken. Discarding it.') + print(f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') continue if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: - print(f'Entire {subset}/{video_name} does not contain the resolution (or it is in a bad format), hence is broken. Discarding it.') + print(f'{Fore.RED}Clip {subset}/{video_name} does not contain an appropriate resolution (or it is in a bad format). It will be discarded.') continue all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] clips_crops = [] clips_intervals = [] + crops_keys=', '.join(crops.keys()) for clip_idx, clip_interval in enumerate(all_clips_intervals): clip_name = f'{video_name}_{clip_idx}.mp4' if not clip_name in crops: - print(f'Clip {subset}/{clip_name} is not present in crops, hence is broken. Discarding it.') + print(f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') continue + else: + print(f'{Fore.GREEN}Appending Clip: {subset}/{clip_name}') clips_crops.append(crops[clip_name]) clips_intervals.append(clip_interval) clips_crops = [list(map(int, cs)) for cs in clips_crops] if len(clips_crops) == 0: - print(f'Entire {subset}/{video_name} does not contain any crops, hence is broken. Discarding it.') + print(f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') continue assert len(clips_intervals) == len(clips_crops) @@ -111,18 +118,13 @@ def download_and_process_video(video_data: Dict, output_dir: str): """ raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") - download_result = download_video(video_data['id'], raw_download_path, resolution=video_data['resolution'], log_file=raw_download_log_file) + print(f"{Fore.LIGHTBLUE_EX} raw_download_path: {raw_download_path}") + + download_result = download_video(video_data['id'], raw_download_path, log_file=raw_download_log_file) if not download_result: - print('Failed to download', video_data) - print(f'See {raw_download_log_file} for details') - return - - # We do not know beforehand, what will be the resolution of the downloaded video - # Youtube-dl selects a (presumably) highest one - video_resolution = get_video_resolution(raw_download_path) - if not video_resolution != video_data['resolution']: - print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + print(f'{Fore.RED} Failed to download {video_data["name"]}') + print(f'{Fore.RED} See {raw_download_log_file} for details') return for clip_idx in range(len(video_data['intervals'])): @@ -132,7 +134,8 @@ def download_and_process_video(video_data: Dict, output_dir: str): crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: - print(f'Failed to cut-and-crop clip #{clip_idx}', video_data) + print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) + pprint.pprint(video_data, indent=4, sort_dicts=False) continue @@ -147,92 +150,115 @@ def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: return data - -def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): +def download_video(video_id, download_path, resolution: int = None, video_format="bestvideo+bestaudio", log_file=None): """ Download video from YouTube. :param video_id: YouTube ID of the video. :param download_path: Where to save the video. - :param video_format: Format to download. - :param log_file: Path to a log file for youtube-dl. + :param resolution: Desired resolution (not currently used in yt-dlp config). + :param video_format: Format to download (default is best video and audio). + :param log_file: Path to a log file for yt-dlp. :return: Tuple: path to the downloaded video and a bool indicating success. - - Copy-pasted from https://github.com/ytdl-org/youtube-dl """ - # if os.path.isfile(download_path): return True # File already exists - - if log_file is None: - stderr = subprocess.DEVNULL - else: - stderr = open(log_file, "a") - video_selection = f"bestvideo[ext={video_format}]" - video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" - command = [ - "youtube-dl", - "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", - video_selection, - "--output", download_path, - "--no-continue" - ] - return_code = subprocess.call(command, stderr=stderr) - success = return_code == 0 - - if log_file is not None: - stderr.close() - - return success and os.path.isfile(download_path) - - -def get_video_resolution(video_path: os.PathLike) -> int: - command = ' '.join([ - "ffprobe", - "-v", "error", - "-select_streams", "v:0", "-show_entries", "stream=height", "-of", "csv=p=0", - video_path - ]) - - process = Popen(command, stdout=PIPE, shell=True) - (output, err) = process.communicate() - return_code = process.wait() - success = return_code == 0 - - if not success: - print('Command failed:', command) - return -1 - - return int(output) - + + class Logger: + """ + A simple logger for yt-dlp to write debug, warning, and error messages to a specified log file. + + Attributes: + log_path (str): Path to the log file where messages will be written. + """ + + def __init__(self, log_path): + """ + Initializes the Logger with a log file path. + + :param log_path: Path to the file where log messages should be saved. + """ + self.log_path = log_path + + def debug(self, msg): + """ + Logs a debug message. + + :param msg: The debug message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"DEBUG: {msg}\n") + + def warning(self, msg): + """ + Logs a warning message. + + :param msg: The warning message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"WARNING: {msg}\n") + + def error(self, msg): + """ + Logs an error message. + + :param msg: The error message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"ERROR: {msg}\n") + + # Define yt-dlp options + ydl_opts = { + 'format': video_format, # Set video format to best video and audio by default + 'outtmpl': download_path, # Output path template + 'quiet': True, # Suppress verbose output + 'merge_output_format': 'mp4', # Ensure output format is MP4 + } + + # If a log file is specified, configure the logger + if log_file: + ydl_opts['logger'] = Logger(log_file) + + # Download the video using yt-dlp + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([f'https://www.youtube.com/watch?v={video_id}']) + success = True + except Exception as e: + success = False + if log_file: + with open(log_file, "a") as f: + f.write(f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") + + result = success and os.path.isfile(download_path) + return download_path, result def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): # if os.path.isfile(output_path): return True # File already exists x, out_w, y, out_h = crop - command = ' '.join([ + command = [ "ffmpeg", "-i", raw_video_path, "-strict", "-2", # Some legacy arguments "-loglevel", "quiet", # Verbosity arguments "-qscale", "0", # Preserve the quality "-y", # Overwrite if the file exists - "-ss", str(start), "-to", str(end), # Cut arguments + "-ss", str(start), + "-to", str(end), "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments output_path - ]) - - return_code = subprocess.call(command, shell=True) + ] + return_code = subprocess.call(command) success = return_code == 0 if not success: - print('Command failed:', command) + print(f'{Fore.RED} Command failed: {" ".join(command)}') return success - if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download HDTF dataset") - parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset') - parser.add_argument('-o', '--output_dir', type=str, help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, default=8, help='Number of workers for downloading') + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') + parser.add_argument('-o', '--output_dir', type=str, default='dataset', help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=1, help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From 387ba7b20be2a30033849b7fd207fa93ce74ac7c Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 12:00:37 -0800 Subject: [PATCH 04/14] Substantial refactoring. Using yt_dlp for YouTube downloads instead of youtube-dl. Added colorama library to improve readability of output. Improving multiprocessing pooling. Lots of logging improvements. Implementation of a Logger class. Substantial additions to have proper docstrings throughout. --- download.py | 389 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 335 insertions(+), 54 deletions(-) diff --git a/download.py b/download.py index c9204a6..45f5846 100644 --- a/download.py +++ b/download.py @@ -31,6 +31,41 @@ cinit(autoreset=True) def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): + """ + Downloads and processes videos from the HDTF dataset in parallel using multiprocessing. + + The function manages the download process by: + - Creating the necessary output directories. + - Constructing a download queue from files in the specified source directory. + - Using a multiprocessing pool to handle downloads and subsequent processing. + - Providing progress tracking with tqdm. + + After completing the download, a message is displayed with optional cleanup instructions to delete + temporary raw video files to save space. + + Args: + source_dir (os.PathLike): The directory containing HDTF metadata files, including video URLs, + crop data, time intervals, and resolution information for each video subset. + output_dir (os.PathLike): The directory where downloaded videos and processed files will be saved. + num_workers (int): The number of parallel worker processes to use for downloading. + **process_video_kwargs: Additional keyword arguments passed to `download_and_process_video`, + allowing custom settings for processing each video. + + Workflow: + 1. Creates the primary output directory and a subdirectory `_videos_raw` for raw downloads. + 2. Calls `construct_download_queue` to prepare a list of video download tasks based on the metadata + available in `source_dir`. Each entry in the queue includes details needed for downloading and processing. + 3. Uses a multiprocessing `Pool` to execute `download_and_process_video` for each video in `download_queue`, + with progress displayed via tqdm. + 4. After completing downloads, provides a message about optional cleanup for temporary video files. + + Returns: + None + + Raises: + AssertionError: If certain data inconsistencies are detected during download queue construction, such as + missing or malformed intervals, crops, or resolution information. + """ os.makedirs(output_dir, exist_ok=True) os.makedirs(os.path.join(output_dir, '_videos_raw'), exist_ok=True) @@ -39,9 +74,10 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: video_data=vd, output_dir=output_dir, **process_video_kwargs, - ) for vd in download_queue] + ) for vd in download_queue] pool = Pool(processes=num_workers) - tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir}') + tqdm_kwargs = dict(total=len(task_kwargs), + desc=f'Downloading videos into {output_dir}') for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): pass @@ -53,32 +89,85 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: + """ + Constructs a queue of videos to be downloaded and processed based on metadata from the HDTF dataset. + + This function reads metadata files for each subset in the HDTF dataset, which provide information on: + - Video URLs. + - Time intervals indicating segments to be extracted from each video. + - Crop coordinates defining the regions of interest. + - Resolution information for each video. + + For each valid video file, an entry is created in the download queue with detailed information required + for downloading, cropping, and segmenting. + + Args: + source_dir (os.PathLike): Path to the directory containing metadata files (`*_video_url.txt`, + `*_crop_wh.txt`, `*_annotion_time.txt`, and `*_resolution.txt`) for each subset. + output_dir (os.PathLike): Path to the directory where the downloaded and processed videos will be stored. + + Returns: + List[Dict]: A list of dictionaries, each representing a video to download and process. Each dictionary + contains the following keys: + - 'name': Combined subset and video name identifier. + - 'id': YouTube video ID extracted from the video URL. + - 'intervals': List of start and end times for each clip segment. + - 'crops': List of crop coordinates for each segment. + - 'output_dir': The output directory path for this video. + - 'resolution': Desired resolution for the video. + + Workflow: + 1. Reads metadata files for each subset (e.g., "RD", "WDA", "WRA") to gather video URLs, time intervals, crops, + and resolution information. + 2. For each video: + - Ensures it has valid time intervals and resolution data. + - Verifies that all segments have corresponding crop information. + - Discards videos missing required metadata, and prints warnings about invalid or missing data. + 3. Creates a download queue entry for each valid video with the required download and processing data. + + Raises: + AssertionError: If the video segment data is inconsistent, such as: + - Missing or malformed time intervals. + - Incomplete or non-square crop data. + These assertions ensure that only well-formed entries are added to the download queue. + + Example: + >>> construct_download_queue("HDTF_dataset", "/tmp/data/hdtf") + [{'name': 'RD_sample_video', 'id': 'abc123', 'intervals': [[0, 10], [15, 25]], + 'crops': [[0, 128, 0, 128], [0, 128, 0, 128]], 'output_dir': '/tmp/data/hdtf', 'resolution': '720p'}] + """ download_queue = [] for subset in subsets: - video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) - crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) - intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) - resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) + video_urls = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_video_url.txt')) + crops = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_crop_wh.txt')) + intervals = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_annotion_time.txt')) + resolutions = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_resolution.txt')) for video_name, (video_url,) in video_urls.items(): if not f'{video_name}.mp4' in intervals: - print(f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') + print( + f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') continue if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: print(f'{Fore.RED}Clip {subset}/{video_name} does not contain an appropriate resolution (or it is in a bad format). It will be discarded.') continue - all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] + all_clips_intervals = [x.split('-') + for x in intervals[f'{video_name}.mp4']] clips_crops = [] clips_intervals = [] - crops_keys=', '.join(crops.keys()) for clip_idx, clip_interval in enumerate(all_clips_intervals): clip_name = f'{video_name}_{clip_idx}.mp4' if not clip_name in crops: - print(f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') + print( + f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') continue else: print(f'{Fore.GREEN}Appending Clip: {subset}/{clip_name}') @@ -88,13 +177,17 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - clips_crops = [list(map(int, cs)) for cs in clips_crops] if len(clips_crops) == 0: - print(f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') + print( + f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') continue assert len(clips_intervals) == len(clips_crops) - assert set([len(vi) for vi in clips_intervals]) == {2}, f"Broken time interval, {clips_intervals}" - assert set([len(vc) for vc in clips_crops]) == {4}, f"Broken crops, {clips_crops}" - assert all([vc[1] == vc[3] for vc in clips_crops]), f'Some crops are not square, {clips_crops}' + assert set([len(vi) for vi in clips_intervals]) == { + 2}, f"Broken time interval, {clips_intervals}" + assert set([len(vc) for vc in clips_crops]) == { + 4}, f"Broken crops, {clips_crops}" + assert all([vc[1] == vc[3] for vc in clips_crops] + ), f'Some crops are not square, {clips_crops}' download_queue.append({ 'name': f'{subset}_{video_name}', @@ -107,20 +200,100 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - return download_queue - def task_proxy(kwargs): + """ + A proxy function to execute `download_and_process_video` with unpacked keyword arguments. + + This function serves as a wrapper that allows passing a dictionary of arguments (`kwargs`) + to the `download_and_process_video` function. It is primarily used in conjunction with + multiprocessing, where it enables the `Pool.imap_unordered` method to handle the video + processing tasks in parallel. + + Args: + kwargs (dict): A dictionary of arguments required by `download_and_process_video`. + This typically includes: + - 'video_data': A dictionary containing video details (ID, name, intervals, crops, etc.). + - 'output_dir': The directory path where processed clips will be saved. + + Returns: + None + + Usage: + The `task_proxy` function is designed for use with parallel processing. By passing a dictionary + of arguments instead of positional arguments, it enables compatibility with the multiprocessing + pool's mapping methods. + + Example: + >>> task_kwargs = {'video_data': {...}, 'output_dir': '/path/to/output'} + >>> task_proxy(task_kwargs) + + Notes: + This function simplifies the interface for multiprocessing tasks, allowing + `download_and_process_video` to be used directly within the parallel processing workflow + without modifying its original function signature. + """ return download_and_process_video(**kwargs) + def download_and_process_video(video_data: Dict, output_dir: str): """ - Downloads the video and cuts/crops it into several ones according to the provided time intervals + Downloads a video from YouTube and processes it by segmenting and cropping based on provided intervals and crop data. + + The function performs the following steps: + 1. Downloads the specified video to a raw file path within the `_videos_raw` subdirectory of `output_dir`. + 2. Iterates over the specified intervals and crop data to create individual video clips: + - Each clip is extracted according to its specified time interval. + - Each clip is cropped based on the coordinates provided in `video_data['crops']`. + 3. Saves each processed clip in `output_dir` with a unique name indicating the video and clip index. + + Args: + video_data (dict): A dictionary containing metadata for the video to be downloaded and processed. + Expected keys include: + - 'id': The YouTube ID of the video. + - 'name': A unique name identifier for the video. + - 'intervals': A list of time intervals (start, end) for each clip segment. + - 'crops': A list of crop coordinates (x, width, y, height) for each clip segment. + - 'resolution': The desired resolution of the video. + output_dir (str): Path to the directory where processed video clips will be saved. + + Workflow: + - Downloads the video using `download_video`, saving it as `{video_name}.mp4` in `_videos_raw`. + - For each time interval in `video_data['intervals']`: + - Extracts the segment and applies cropping according to the corresponding entry in `video_data['crops']`. + - Saves each clip in `output_dir` with a file name formatted as `{video_name}_{clip_idx:03d}.mp4`. + - Logs errors to the console if downloading or processing fails for a particular segment or crop. + + Returns: + None + + Raises: + ValueError: If the video cannot be downloaded or if any of the cropping or segmentation fails. + + Example: + >>> video_data = { + 'id': 'abc123', + 'name': 'sample_video', + 'intervals': [[0, 10], [15, 25]], + 'crops': [[0, 128, 0, 128], [10, 118, 10, 118]], + 'output_dir': '/tmp/data/hdtf', + 'resolution': '720p' + } + >>> download_and_process_video(video_data, '/tmp/data/hdtf') + + Notes: + - This function requires `ffmpeg` to be installed for segmenting and cropping video clips. + - Detailed logging is provided to indicate the status of each clip's download and processing. + - If `download_video` fails, an error message is printed to the console, and the function skips further processing. """ - raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") - raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") + raw_download_path = os.path.join( + output_dir, '_videos_raw', f"{video_data['name']}.mp4") + raw_download_log_file = os.path.join( + output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") print(f"{Fore.LIGHTBLUE_EX} raw_download_path: {raw_download_path}") - - download_result = download_video(video_data['id'], raw_download_path, log_file=raw_download_log_file) + + download_result = download_video( + video_data['id'], raw_download_path, log_file=raw_download_log_file) if not download_result: print(f'{Fore.RED} Failed to download {video_data["name"]}') @@ -131,17 +304,43 @@ def download_and_process_video(video_data: Dict, output_dir: str): start, end = video_data['intervals'][clip_idx] clip_name = f'{video_data["name"]}_{clip_idx:03d}' clip_path = os.path.join(output_dir, clip_name + '.mp4') - crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) + crop_success = cut_and_crop_video( + raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) pprint.pprint(video_data, indent=4, sort_dicts=False) continue - def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: """ - Reads a file as a space-separated dataframe, where the first column is the index + Reads a space-separated file and returns its contents as a dictionary. + + This function reads a text file where each line contains space-separated values. + The first value in each line is treated as the key, and the remaining values are + stored as a list associated with that key. This is useful for parsing metadata + files with a consistent space-separated format. + + Args: + filepath (os.PathLike): The path to the file to be read. + + Returns: + Dict: A dictionary where each key corresponds to the first item in a line, + and each value is a list of the remaining items in that line. + + Example: + Suppose `example.txt` contains: + video1 1280 720 + video2 640 480 + >>> read_file_as_space_separated_data("example.txt") + {'video1': ['1280', '720'], 'video2': ['640', '480']} + + Notes: + - Blank lines are not supported and may cause errors. + - Each line must contain at least one space-separated value to be valid. + + Raises: + IOError: If the file cannot be opened or read. """ with open(filepath, 'r') as f: lines = f.read().splitlines() @@ -150,29 +349,60 @@ def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: return data -def download_video(video_id, download_path, resolution: int = None, video_format="bestvideo+bestaudio", log_file=None): + +def download_video(video_id, download_path, video_format="bestvideo+bestaudio", log_file=None): """ - Download video from YouTube. - :param video_id: YouTube ID of the video. - :param download_path: Where to save the video. - :param resolution: Desired resolution (not currently used in yt-dlp config). - :param video_format: Format to download (default is best video and audio). - :param log_file: Path to a log file for yt-dlp. - :return: Tuple: path to the downloaded video and a bool indicating success. + Downloads a YouTube video in the specified format and saves it to a given path. + + This function uses `yt-dlp` to download a video by its YouTube ID, selecting the highest + available quality by default. It provides options for specifying a custom format or resolution + and can log download progress and errors to a specified log file. + + Args: + video_id (str): The YouTube ID of the video to download. + download_path (str): The full path (including file name) where the downloaded video will be saved. + video_format (str, optional): The video and audio format selection for yt-dlp. Defaults to + "bestvideo+bestaudio" for highest available quality. + log_file (str, optional): Path to a file where log messages (debug, warnings, and errors) + will be recorded. If None, logging to a file is disabled. + + Returns: + Tuple[str, bool]: A tuple where: + - The first element is the path to the downloaded video file. + - The second element is a boolean indicating success (True if the file + was downloaded successfully, False otherwise). + + Workflow: + 1. Constructs `yt-dlp` options based on the provided arguments, including `format`, `outtmpl`, + and `logger` if a log file is specified. + 2. Attempts to download the video. If successful, verifies the file exists at `download_path`. + 3. Logs errors if the download fails and saves them to `log_file` if specified. + + Raises: + Exception: Any exceptions during the download are logged if `log_file` is provided, and the + function will return False for success. + + Example: + >>> download_video("abc123", "/path/to/video.mp4", log_file="/path/to/log.txt") + ("/path/to/video.mp4", True) + + Notes: + - Requires `yt-dlp` to be installed. + - Requires `ffmpeg` if merging video and audio streams is necessary. + - Custom logging is provided through a nested `Logger` class if `log_file` is specified. """ - class Logger: """ A simple logger for yt-dlp to write debug, warning, and error messages to a specified log file. - + Attributes: log_path (str): Path to the log file where messages will be written. """ - + def __init__(self, log_path): """ Initializes the Logger with a log file path. - + :param log_path: Path to the file where log messages should be saved. """ self.log_path = log_path @@ -180,7 +410,7 @@ def __init__(self, log_path): def debug(self, msg): """ Logs a debug message. - + :param msg: The debug message to log. """ with open(self.log_path, "a") as f: @@ -189,7 +419,7 @@ def debug(self, msg): def warning(self, msg): """ Logs a warning message. - + :param msg: The warning message to log. """ with open(self.log_path, "a") as f: @@ -198,7 +428,7 @@ def warning(self, msg): def error(self, msg): """ Logs an error message. - + :param msg: The error message to log. """ with open(self.log_path, "a") as f: @@ -206,16 +436,17 @@ def error(self, msg): # Define yt-dlp options ydl_opts = { - 'format': video_format, # Set video format to best video and audio by default + # Set video format to best video and audio by default + 'format': video_format, 'outtmpl': download_path, # Output path template 'quiet': True, # Suppress verbose output 'merge_output_format': 'mp4', # Ensure output format is MP4 } - + # If a log file is specified, configure the logger if log_file: ydl_opts['logger'] = Logger(log_file) - + # Download the video using yt-dlp try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: @@ -225,25 +456,71 @@ def error(self, msg): success = False if log_file: with open(log_file, "a") as f: - f.write(f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") - + f.write( + f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") + result = success and os.path.isfile(download_path) return download_path, result def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): - # if os.path.isfile(output_path): return True # File already exists - + """ + Cuts and crops a video segment from a specified start to end time and saves it to the output path. + + This function uses `ffmpeg` to: + 1. Extract a segment of the video from `start` to `end` time. + 2. Apply a crop filter to the segment based on the provided crop coordinates. + 3. Save the processed clip to `output_path` with the original quality preserved. + + Args: + raw_video_path (str): Path to the source video file to be processed. + output_path (str): Path where the processed video clip will be saved, including the file name. + start (float or int): Start time in seconds for the video segment to be cut. + end (float or int): End time in seconds for the video segment to be cut. + crop (List[int]): A list specifying crop parameters [x, width, y, height], where: + - x (int): The x-coordinate of the top-left corner of the crop area. + - width (int): The width of the crop area. + - y (int): The y-coordinate of the top-left corner of the crop area. + - height (int): The height of the crop area. + + Returns: + bool: True if the cutting and cropping were successful, False otherwise. + + Workflow: + 1. Constructs an `ffmpeg` command to cut the video from `start` to `end` and apply the specified crop filter. + 2. Executes the command with `subprocess.call` to process the video. + 3. Checks the return code to confirm successful execution. Prints a message if the process fails. + + Raises: + ValueError: If `crop` does not contain exactly four values, or if any component is invalid. + FileNotFoundError: If `ffmpeg` is not installed or accessible from the system PATH. + + Example: + >>> cut_and_crop_video( + raw_video_path="/path/to/source.mp4", + output_path="/path/to/clip.mp4", + start=10, + end=20, + crop=[50, 200, 30, 200] + ) + True + + Notes: + - Requires `ffmpeg` to be installed and accessible from the command line. + - If `output_path` already exists, it will be overwritten. + - `-qscale 0` is used to preserve the video quality. + - The crop filter uses the format `crop=width:height:x:y`, where `x` and `y` specify the top-left corner. + """ x, out_w, y, out_h = crop command = [ "ffmpeg", "-i", raw_video_path, - "-strict", "-2", # Some legacy arguments - "-loglevel", "quiet", # Verbosity arguments - "-qscale", "0", # Preserve the quality - "-y", # Overwrite if the file exists - "-ss", str(start), + "-strict", "-2", # Some legacy arguments + "-loglevel", "quiet", # Verbosity arguments + "-qscale", "0", # Preserve the quality + "-y", # Overwrite if the file exists + "-ss", str(start), "-to", str(end), - "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments output_path ] return_code = subprocess.call(command) @@ -254,11 +531,15 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) return success + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download HDTF dataset") - parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') - parser.add_argument('-o', '--output_dir', type=str, default='dataset', help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, default=1, help='Number of workers for downloading.') + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', + help='Path to the directory with the dataset description') + parser.add_argument('-o', '--output_dir', type=str, + default='dataset', help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, + default=1, help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From 9b69a8923c7efa96955c0cf0008697ad75d37c3d Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 12:07:36 -0800 Subject: [PATCH 05/14] Minor fix to syntax - the crop filter portion shouldn't have been quoted. --- download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/download.py b/download.py index 45f5846..ef75ac1 100644 --- a/download.py +++ b/download.py @@ -308,7 +308,7 @@ def download_and_process_video(video_data: Dict, output_dir: str): raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: - print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) + print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}') pprint.pprint(video_data, indent=4, sort_dicts=False) continue @@ -520,7 +520,7 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) "-y", # Overwrite if the file exists "-ss", str(start), "-to", str(end), - "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + "-filter:v", f"crop={out_w}:{out_h}:{x}:{y}", # Crop arguments output_path ] return_code = subprocess.call(command) From ccfc2d03e2676c20168806f5fe047e920e9a2939 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 13:05:32 -0800 Subject: [PATCH 06/14] Tweaks to formatting for consistency. --- download.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/download.py b/download.py index ef75ac1..8d80d32 100644 --- a/download.py +++ b/download.py @@ -536,10 +536,10 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) parser = argparse.ArgumentParser(description="Download HDTF dataset") parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') - parser.add_argument('-o', '--output_dir', type=str, - default='dataset', help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, - default=1, help='Number of workers for downloading.') + parser.add_argument('-o', '--output_dir', type=str, default='download', + help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=1, + help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From 3fd7a9343cace7c01486ee0655f583db955f25ef Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 13:14:01 -0800 Subject: [PATCH 07/14] Adding the download directory and main log file to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..46c31ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +download/ +output_logfile.txt From 572449048daa7817930ac5d6226ed9413aa00ca5 Mon Sep 17 00:00:00 2001 From: Ivan Skorokhodov Date: Mon, 23 Aug 2021 16:39:06 +0300 Subject: [PATCH 08/14] feat: add the downloading script --- download.py | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 download.py diff --git a/download.py b/download.py new file mode 100644 index 0000000..3e21b61 --- /dev/null +++ b/download.py @@ -0,0 +1,242 @@ +""" +This file downloads almost all the videos from the HDTF dataset. Some videos are discarded for the following reasons: +- they do not contain cropping information because they are somewhat noisy (hand moving, background changing, etc.) +- they are not available on youtube anymore (at all or in the specified format) + +The discarded videos constitute a small portion of the dataset, so you can try to re-download them manually on your own. + +Usage: +``` +$ python download.py --output_dir /tmp/data/hdtf --num_workers 8 +``` + +You need tqdm and youtube-dl libraries to be installed for this script to work. +""" + + +import os +import argparse +from typing import List, Dict +from multiprocessing import Pool +import subprocess +from subprocess import Popen, PIPE +from urllib import parse + +from tqdm import tqdm + + +subsets = ["RD", "WDA", "WRA"] + + +def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, '_videos_raw'), exist_ok=True) + + download_queue = construct_download_queue(source_dir, output_dir) + task_kwargs = [dict( + video_data=vd, + output_dir=output_dir, + **process_video_kwargs, + ) for vd in download_queue] + pool = Pool(processes=num_workers) + tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir} (note: without sound)') + + for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): + pass + + print('Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') + print(' -', os.path.join(output_dir, '_videos_raw')) + + +def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: + download_queue = [] + + for subset in subsets: + video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) + crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) + intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) + resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) + + for video_name, (video_url,) in video_urls.items(): + if not f'{video_name}.mp4' in intervals: + print(f'Entire {subset}/{video_name} does not contain any clip intervals, hence is broken. Discarding it.') + continue + + if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: + print(f'Entire {subset}/{video_name} does not contain the resolution (or it is in a bad format), hence is broken. Discarding it.') + continue + + all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] + clips_crops = [] + clips_intervals = [] + + for clip_idx, clip_interval in enumerate(all_clips_intervals): + clip_name = f'{video_name}_{clip_idx}.mp4' + if not clip_name in crops: + print(f'Clip {subset}/{clip_name} is not present in crops, hence is broken. Discarding it.') + continue + clips_crops.append(crops[clip_name]) + clips_intervals.append(clip_interval) + + clips_crops = [list(map(int, cs)) for cs in clips_crops] + + if len(clips_crops) == 0: + print(f'Entire {subset}/{video_name} does not contain any crops, hence is broken. Discarding it.') + continue + + assert len(clips_intervals) == len(clips_crops) + assert set([len(vi) for vi in clips_intervals]) == {2}, f"Broken time interval, {clips_intervals}" + assert set([len(vc) for vc in clips_crops]) == {4}, f"Broken crops, {clips_crops}" + assert all([vc[1] == vc[3] for vc in clips_crops]), f'Some crops are not square, {clips_crops}' + + download_queue.append({ + 'name': f'{subset}_{video_name}', + 'id': parse.parse_qs(parse.urlparse(video_url).query)['v'][0], + 'intervals': clips_intervals, + 'crops': clips_crops, + 'output_dir': output_dir, + 'resolution': resolutions[f'{video_name}.mp4'][0] + }) + + return download_queue + + +def task_proxy(kwargs): + return download_and_process_video(**kwargs) + + +def download_and_process_video(video_data: Dict, output_dir: str): + """ + Downloads the video and cuts/crops it into several ones according to the provided time intervals + """ + raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") + raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") + download_result = download_video(video_data['id'], raw_download_path, resolution=video_data['resolution'], log_file=raw_download_log_file) + + if not download_result: + print('Failed to download', video_data) + print(f'See {raw_download_log_file} for details') + return + + # We do not know beforehand, what will be the resolution of the downloaded video + # Youtube-dl selects a (presumably) highest one + video_resolution = get_video_resolution(raw_download_path) + if not video_resolution != video_data['resolution']: + print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + return + + for clip_idx in range(len(video_data['intervals'])): + start, end = video_data['intervals'][clip_idx] + clip_name = f'{video_data["name"]}_{clip_idx:03d}' + clip_path = os.path.join(output_dir, clip_name + '.mp4') + crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) + + if not crop_success: + print(f'Failed to cut-and-crop clip #{clip_idx}', video_data) + continue + + +def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: + """ + Reads a file as a space-separated dataframe, where the first column is the index + """ + with open(filepath, 'r') as f: + lines = f.read().splitlines() + lines = [[v.strip() for v in l.strip().split(' ')] for l in lines] + data = {l[0]: l[1:] for l in lines} + + return data + + +def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): + """ + Download video from YouTube. + :param video_id: YouTube ID of the video. + :param download_path: Where to save the video. + :param video_format: Format to download. + :param log_file: Path to a log file for youtube-dl. + :return: Tuple: path to the downloaded video and a bool indicating success. + + Copy-pasted from https://github.com/ytdl-org/youtube-dl + """ + # if os.path.isfile(download_path): return True # File already exists + + if log_file is None: + stderr = subprocess.DEVNULL + else: + stderr = open(log_file, "a") + video_selection = f"bestvideo[ext={video_format}]" + video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" + command = [ + "youtube-dl", + "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", + video_selection, + "--output", download_path, + "--no-continue" + ] + return_code = subprocess.call(command, stderr=stderr) + success = return_code == 0 + + if log_file is not None: + stderr.close() + + return success and os.path.isfile(download_path) + + +def get_video_resolution(video_path: os.PathLike) -> int: + command = ' '.join([ + "ffprobe", + "-v", "error", + "-select_streams", "v:0", "-show_entries", "stream=height", "-of", "csv=p=0", + video_path + ]) + + process = Popen(command, stdout=PIPE, shell=True) + (output, err) = process.communicate() + return_code = process.wait() + success = return_code == 0 + + if not success: + print('Command failed:', command) + return -1 + + return int(output) + + +def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): + # if os.path.isfile(output_path): return True # File already exists + + x, out_w, y, out_h = crop + + command = ' '.join([ + "ffmpeg", "-i", raw_video_path, + "-strict", "-2", # Some legacy arguments + "-loglevel", "quiet", # Verbosity arguments + "-qscale", "0", # Preserve the quality + "-y", # Overwrite if the file exists + "-ss", str(start), "-to", str(end), # Cut arguments + "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + output_path + ]) + + return_code = subprocess.call(command, shell=True) + success = return_code == 0 + + if not success: + print('Command failed:', command) + + return success + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download HDTF dataset") + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset') + parser.add_argument('-o', '--output_dir', type=str, help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=8, help='Number of workers for downloading') + args = parser.parse_args() + + download_hdtf( + args.source_dir, + args.output_dir, + args.num_workers, + ) From c41d90d738c614bbfd6ac9a113b01575b537c074 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Fri, 15 Nov 2024 11:16:39 -0800 Subject: [PATCH 09/14] Updated README --- README.md | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e2e2437..b25cf81 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # HDTF +<<<<<<< HEAD Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset paper supplementary [demo video](https://www.youtube.com/watch?v=uJdBgWYBTww) @@ -8,6 +9,14 @@ Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual **./HDTF_dataset** consists of *youtube video url*, *video resolution* (in our method, may not be the best resolution), *time stamps of talking face*, *facial region* (in the our method) and *the zoom scale* of the cropped window. **xx_video_url.txt:** +======= +Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset +paper supplementary + +## Details of HDTF dataset +**./HDTF_dataset** consists of *youtube video url*, *video resolution* (in our method, may not be the best resolution), *time stamps of talking face*, *facial region* (in the our method) and *the zoom scale* of the cropped window. +**xx_video_url.txt:** +>>>>>>> 8c402f4 (doc: add the downloading instructions) ``` @@ -31,18 +40,17 @@ format: video name+clip index | min_width | width | min_height | height (in format: video name+clip index | window zoom scale ``` - ## Processing of HDTF dataset -When using HDTF dataset, +When using HDTF dataset, - We provide video and url in **xx_video_url.txt**. (the highest definition of videos are 1080P or 720P). Transform video into **.mp4** format and transform interlaced video to progressive video as well. - We split long original video into talking head clips with time stamps in **xx_annotion_time.txt**. Name the splitted clip as **video name_clip index.mp4**. For example, split the video *Radio11.mp4 00:30-01:00 01:30-02:30* into *Radio11_0.mp4* and *Radio11_1.mp4* . - - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! + - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! 1. Download the video with reference resulotion in **xx_resolution.txt** and crop the facial region with fixed window size in **xx_crop_wh.txt**. (This method is as same as ours, but the downloaded video may not be the best resolution). - 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. + 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. - We resize all cropped videos into **512 x 512** resolution. @@ -62,6 +70,14 @@ The code is in **./code_animation2video**, pls visit [here](https://github.com/M #### code of reproducing other works coming soon...... +## Downloading +For convenience, we added the `download.py` script which downloads, crops and resizes the dataset. You can use it via the following command: +``` +python download.py --output_dir /path/to/output/dir --num_workers 8 +``` + +Note: some videos might become unavailable if the authors will remove them or make them private. + ## Reference if you use HDTF, pls reference From 15b819372a57855389993887527027dcad01294d Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 11:18:38 -0800 Subject: [PATCH 10/14] Substantial refactoring. Using yt_dlp for YouTube downloads instead of youtube-dl. Added colorama library to improve readability of output. Improving multiprocessing pooling. Lots of logging improvements. Implementation of a Logger class. --- download.py | 190 +++++++++++++++++++++++++++++----------------------- 1 file changed, 108 insertions(+), 82 deletions(-) diff --git a/download.py b/download.py index 3e21b61..c9204a6 100644 --- a/download.py +++ b/download.py @@ -10,23 +10,25 @@ $ python download.py --output_dir /tmp/data/hdtf --num_workers 8 ``` -You need tqdm and youtube-dl libraries to be installed for this script to work. +You need tqdm, yt_dlp, and colorama libraries to be installed for this script to work. """ - import os import argparse +import subprocess +import pprint from typing import List, Dict from multiprocessing import Pool -import subprocess -from subprocess import Popen, PIPE from urllib import parse +import yt_dlp from tqdm import tqdm - +from colorama import init as cinit +from colorama import Fore subsets = ["RD", "WDA", "WRA"] +cinit(autoreset=True) def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): os.makedirs(output_dir, exist_ok=True) @@ -39,13 +41,15 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: **process_video_kwargs, ) for vd in download_queue] pool = Pool(processes=num_workers) - tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir} (note: without sound)') + tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir}') for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): pass + pool.close() + pool.join() - print('Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') - print(' -', os.path.join(output_dir, '_videos_raw')) + print(Fore.GREEN+'Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') + print(Fore.GREEN+' - '+os.path.join(output_dir, '_videos_raw')) def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: @@ -59,29 +63,32 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - for video_name, (video_url,) in video_urls.items(): if not f'{video_name}.mp4' in intervals: - print(f'Entire {subset}/{video_name} does not contain any clip intervals, hence is broken. Discarding it.') + print(f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') continue if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: - print(f'Entire {subset}/{video_name} does not contain the resolution (or it is in a bad format), hence is broken. Discarding it.') + print(f'{Fore.RED}Clip {subset}/{video_name} does not contain an appropriate resolution (or it is in a bad format). It will be discarded.') continue all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] clips_crops = [] clips_intervals = [] + crops_keys=', '.join(crops.keys()) for clip_idx, clip_interval in enumerate(all_clips_intervals): clip_name = f'{video_name}_{clip_idx}.mp4' if not clip_name in crops: - print(f'Clip {subset}/{clip_name} is not present in crops, hence is broken. Discarding it.') + print(f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') continue + else: + print(f'{Fore.GREEN}Appending Clip: {subset}/{clip_name}') clips_crops.append(crops[clip_name]) clips_intervals.append(clip_interval) clips_crops = [list(map(int, cs)) for cs in clips_crops] if len(clips_crops) == 0: - print(f'Entire {subset}/{video_name} does not contain any crops, hence is broken. Discarding it.') + print(f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') continue assert len(clips_intervals) == len(clips_crops) @@ -111,18 +118,13 @@ def download_and_process_video(video_data: Dict, output_dir: str): """ raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") - download_result = download_video(video_data['id'], raw_download_path, resolution=video_data['resolution'], log_file=raw_download_log_file) + print(f"{Fore.LIGHTBLUE_EX} raw_download_path: {raw_download_path}") + + download_result = download_video(video_data['id'], raw_download_path, log_file=raw_download_log_file) if not download_result: - print('Failed to download', video_data) - print(f'See {raw_download_log_file} for details') - return - - # We do not know beforehand, what will be the resolution of the downloaded video - # Youtube-dl selects a (presumably) highest one - video_resolution = get_video_resolution(raw_download_path) - if not video_resolution != video_data['resolution']: - print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + print(f'{Fore.RED} Failed to download {video_data["name"]}') + print(f'{Fore.RED} See {raw_download_log_file} for details') return for clip_idx in range(len(video_data['intervals'])): @@ -132,7 +134,8 @@ def download_and_process_video(video_data: Dict, output_dir: str): crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: - print(f'Failed to cut-and-crop clip #{clip_idx}', video_data) + print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) + pprint.pprint(video_data, indent=4, sort_dicts=False) continue @@ -147,92 +150,115 @@ def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: return data - -def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): +def download_video(video_id, download_path, resolution: int = None, video_format="bestvideo+bestaudio", log_file=None): """ Download video from YouTube. :param video_id: YouTube ID of the video. :param download_path: Where to save the video. - :param video_format: Format to download. - :param log_file: Path to a log file for youtube-dl. + :param resolution: Desired resolution (not currently used in yt-dlp config). + :param video_format: Format to download (default is best video and audio). + :param log_file: Path to a log file for yt-dlp. :return: Tuple: path to the downloaded video and a bool indicating success. - - Copy-pasted from https://github.com/ytdl-org/youtube-dl """ - # if os.path.isfile(download_path): return True # File already exists - - if log_file is None: - stderr = subprocess.DEVNULL - else: - stderr = open(log_file, "a") - video_selection = f"bestvideo[ext={video_format}]" - video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" - command = [ - "youtube-dl", - "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", - video_selection, - "--output", download_path, - "--no-continue" - ] - return_code = subprocess.call(command, stderr=stderr) - success = return_code == 0 - - if log_file is not None: - stderr.close() - - return success and os.path.isfile(download_path) - - -def get_video_resolution(video_path: os.PathLike) -> int: - command = ' '.join([ - "ffprobe", - "-v", "error", - "-select_streams", "v:0", "-show_entries", "stream=height", "-of", "csv=p=0", - video_path - ]) - - process = Popen(command, stdout=PIPE, shell=True) - (output, err) = process.communicate() - return_code = process.wait() - success = return_code == 0 - - if not success: - print('Command failed:', command) - return -1 - - return int(output) - + + class Logger: + """ + A simple logger for yt-dlp to write debug, warning, and error messages to a specified log file. + + Attributes: + log_path (str): Path to the log file where messages will be written. + """ + + def __init__(self, log_path): + """ + Initializes the Logger with a log file path. + + :param log_path: Path to the file where log messages should be saved. + """ + self.log_path = log_path + + def debug(self, msg): + """ + Logs a debug message. + + :param msg: The debug message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"DEBUG: {msg}\n") + + def warning(self, msg): + """ + Logs a warning message. + + :param msg: The warning message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"WARNING: {msg}\n") + + def error(self, msg): + """ + Logs an error message. + + :param msg: The error message to log. + """ + with open(self.log_path, "a") as f: + f.write(f"ERROR: {msg}\n") + + # Define yt-dlp options + ydl_opts = { + 'format': video_format, # Set video format to best video and audio by default + 'outtmpl': download_path, # Output path template + 'quiet': True, # Suppress verbose output + 'merge_output_format': 'mp4', # Ensure output format is MP4 + } + + # If a log file is specified, configure the logger + if log_file: + ydl_opts['logger'] = Logger(log_file) + + # Download the video using yt-dlp + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([f'https://www.youtube.com/watch?v={video_id}']) + success = True + except Exception as e: + success = False + if log_file: + with open(log_file, "a") as f: + f.write(f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") + + result = success and os.path.isfile(download_path) + return download_path, result def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): # if os.path.isfile(output_path): return True # File already exists x, out_w, y, out_h = crop - command = ' '.join([ + command = [ "ffmpeg", "-i", raw_video_path, "-strict", "-2", # Some legacy arguments "-loglevel", "quiet", # Verbosity arguments "-qscale", "0", # Preserve the quality "-y", # Overwrite if the file exists - "-ss", str(start), "-to", str(end), # Cut arguments + "-ss", str(start), + "-to", str(end), "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments output_path - ]) - - return_code = subprocess.call(command, shell=True) + ] + return_code = subprocess.call(command) success = return_code == 0 if not success: - print('Command failed:', command) + print(f'{Fore.RED} Command failed: {" ".join(command)}') return success - if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download HDTF dataset") - parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset') - parser.add_argument('-o', '--output_dir', type=str, help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, default=8, help='Number of workers for downloading') + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') + parser.add_argument('-o', '--output_dir', type=str, default='dataset', help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=1, help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From 96a979a077825c86f247e34c5b166fc0ac2507dc Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 12:00:37 -0800 Subject: [PATCH 11/14] Substantial refactoring. Using yt_dlp for YouTube downloads instead of youtube-dl. Added colorama library to improve readability of output. Improving multiprocessing pooling. Lots of logging improvements. Implementation of a Logger class. Substantial additions to have proper docstrings throughout. --- download.py | 389 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 335 insertions(+), 54 deletions(-) diff --git a/download.py b/download.py index c9204a6..45f5846 100644 --- a/download.py +++ b/download.py @@ -31,6 +31,41 @@ cinit(autoreset=True) def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): + """ + Downloads and processes videos from the HDTF dataset in parallel using multiprocessing. + + The function manages the download process by: + - Creating the necessary output directories. + - Constructing a download queue from files in the specified source directory. + - Using a multiprocessing pool to handle downloads and subsequent processing. + - Providing progress tracking with tqdm. + + After completing the download, a message is displayed with optional cleanup instructions to delete + temporary raw video files to save space. + + Args: + source_dir (os.PathLike): The directory containing HDTF metadata files, including video URLs, + crop data, time intervals, and resolution information for each video subset. + output_dir (os.PathLike): The directory where downloaded videos and processed files will be saved. + num_workers (int): The number of parallel worker processes to use for downloading. + **process_video_kwargs: Additional keyword arguments passed to `download_and_process_video`, + allowing custom settings for processing each video. + + Workflow: + 1. Creates the primary output directory and a subdirectory `_videos_raw` for raw downloads. + 2. Calls `construct_download_queue` to prepare a list of video download tasks based on the metadata + available in `source_dir`. Each entry in the queue includes details needed for downloading and processing. + 3. Uses a multiprocessing `Pool` to execute `download_and_process_video` for each video in `download_queue`, + with progress displayed via tqdm. + 4. After completing downloads, provides a message about optional cleanup for temporary video files. + + Returns: + None + + Raises: + AssertionError: If certain data inconsistencies are detected during download queue construction, such as + missing or malformed intervals, crops, or resolution information. + """ os.makedirs(output_dir, exist_ok=True) os.makedirs(os.path.join(output_dir, '_videos_raw'), exist_ok=True) @@ -39,9 +74,10 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: video_data=vd, output_dir=output_dir, **process_video_kwargs, - ) for vd in download_queue] + ) for vd in download_queue] pool = Pool(processes=num_workers) - tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir}') + tqdm_kwargs = dict(total=len(task_kwargs), + desc=f'Downloading videos into {output_dir}') for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): pass @@ -53,32 +89,85 @@ def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: + """ + Constructs a queue of videos to be downloaded and processed based on metadata from the HDTF dataset. + + This function reads metadata files for each subset in the HDTF dataset, which provide information on: + - Video URLs. + - Time intervals indicating segments to be extracted from each video. + - Crop coordinates defining the regions of interest. + - Resolution information for each video. + + For each valid video file, an entry is created in the download queue with detailed information required + for downloading, cropping, and segmenting. + + Args: + source_dir (os.PathLike): Path to the directory containing metadata files (`*_video_url.txt`, + `*_crop_wh.txt`, `*_annotion_time.txt`, and `*_resolution.txt`) for each subset. + output_dir (os.PathLike): Path to the directory where the downloaded and processed videos will be stored. + + Returns: + List[Dict]: A list of dictionaries, each representing a video to download and process. Each dictionary + contains the following keys: + - 'name': Combined subset and video name identifier. + - 'id': YouTube video ID extracted from the video URL. + - 'intervals': List of start and end times for each clip segment. + - 'crops': List of crop coordinates for each segment. + - 'output_dir': The output directory path for this video. + - 'resolution': Desired resolution for the video. + + Workflow: + 1. Reads metadata files for each subset (e.g., "RD", "WDA", "WRA") to gather video URLs, time intervals, crops, + and resolution information. + 2. For each video: + - Ensures it has valid time intervals and resolution data. + - Verifies that all segments have corresponding crop information. + - Discards videos missing required metadata, and prints warnings about invalid or missing data. + 3. Creates a download queue entry for each valid video with the required download and processing data. + + Raises: + AssertionError: If the video segment data is inconsistent, such as: + - Missing or malformed time intervals. + - Incomplete or non-square crop data. + These assertions ensure that only well-formed entries are added to the download queue. + + Example: + >>> construct_download_queue("HDTF_dataset", "/tmp/data/hdtf") + [{'name': 'RD_sample_video', 'id': 'abc123', 'intervals': [[0, 10], [15, 25]], + 'crops': [[0, 128, 0, 128], [0, 128, 0, 128]], 'output_dir': '/tmp/data/hdtf', 'resolution': '720p'}] + """ download_queue = [] for subset in subsets: - video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) - crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) - intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) - resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) + video_urls = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_video_url.txt')) + crops = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_crop_wh.txt')) + intervals = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_annotion_time.txt')) + resolutions = read_file_as_space_separated_data( + os.path.join(source_dir, f'{subset}_resolution.txt')) for video_name, (video_url,) in video_urls.items(): if not f'{video_name}.mp4' in intervals: - print(f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') + print( + f'{Fore.RED}Clip {subset}/{video_name} does not contain any clip intervals. It will be discarded.') continue if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: print(f'{Fore.RED}Clip {subset}/{video_name} does not contain an appropriate resolution (or it is in a bad format). It will be discarded.') continue - all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] + all_clips_intervals = [x.split('-') + for x in intervals[f'{video_name}.mp4']] clips_crops = [] clips_intervals = [] - crops_keys=', '.join(crops.keys()) for clip_idx, clip_interval in enumerate(all_clips_intervals): clip_name = f'{video_name}_{clip_idx}.mp4' if not clip_name in crops: - print(f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') + print( + f'{Fore.RED}Discarding Clip: {subset}/{clip_name}. Clip is not present in crops.') continue else: print(f'{Fore.GREEN}Appending Clip: {subset}/{clip_name}') @@ -88,13 +177,17 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - clips_crops = [list(map(int, cs)) for cs in clips_crops] if len(clips_crops) == 0: - print(f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') + print( + f'{Fore.RED}Discarding {subset}/{video_name}. No cropped versions found.') continue assert len(clips_intervals) == len(clips_crops) - assert set([len(vi) for vi in clips_intervals]) == {2}, f"Broken time interval, {clips_intervals}" - assert set([len(vc) for vc in clips_crops]) == {4}, f"Broken crops, {clips_crops}" - assert all([vc[1] == vc[3] for vc in clips_crops]), f'Some crops are not square, {clips_crops}' + assert set([len(vi) for vi in clips_intervals]) == { + 2}, f"Broken time interval, {clips_intervals}" + assert set([len(vc) for vc in clips_crops]) == { + 4}, f"Broken crops, {clips_crops}" + assert all([vc[1] == vc[3] for vc in clips_crops] + ), f'Some crops are not square, {clips_crops}' download_queue.append({ 'name': f'{subset}_{video_name}', @@ -107,20 +200,100 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - return download_queue - def task_proxy(kwargs): + """ + A proxy function to execute `download_and_process_video` with unpacked keyword arguments. + + This function serves as a wrapper that allows passing a dictionary of arguments (`kwargs`) + to the `download_and_process_video` function. It is primarily used in conjunction with + multiprocessing, where it enables the `Pool.imap_unordered` method to handle the video + processing tasks in parallel. + + Args: + kwargs (dict): A dictionary of arguments required by `download_and_process_video`. + This typically includes: + - 'video_data': A dictionary containing video details (ID, name, intervals, crops, etc.). + - 'output_dir': The directory path where processed clips will be saved. + + Returns: + None + + Usage: + The `task_proxy` function is designed for use with parallel processing. By passing a dictionary + of arguments instead of positional arguments, it enables compatibility with the multiprocessing + pool's mapping methods. + + Example: + >>> task_kwargs = {'video_data': {...}, 'output_dir': '/path/to/output'} + >>> task_proxy(task_kwargs) + + Notes: + This function simplifies the interface for multiprocessing tasks, allowing + `download_and_process_video` to be used directly within the parallel processing workflow + without modifying its original function signature. + """ return download_and_process_video(**kwargs) + def download_and_process_video(video_data: Dict, output_dir: str): """ - Downloads the video and cuts/crops it into several ones according to the provided time intervals + Downloads a video from YouTube and processes it by segmenting and cropping based on provided intervals and crop data. + + The function performs the following steps: + 1. Downloads the specified video to a raw file path within the `_videos_raw` subdirectory of `output_dir`. + 2. Iterates over the specified intervals and crop data to create individual video clips: + - Each clip is extracted according to its specified time interval. + - Each clip is cropped based on the coordinates provided in `video_data['crops']`. + 3. Saves each processed clip in `output_dir` with a unique name indicating the video and clip index. + + Args: + video_data (dict): A dictionary containing metadata for the video to be downloaded and processed. + Expected keys include: + - 'id': The YouTube ID of the video. + - 'name': A unique name identifier for the video. + - 'intervals': A list of time intervals (start, end) for each clip segment. + - 'crops': A list of crop coordinates (x, width, y, height) for each clip segment. + - 'resolution': The desired resolution of the video. + output_dir (str): Path to the directory where processed video clips will be saved. + + Workflow: + - Downloads the video using `download_video`, saving it as `{video_name}.mp4` in `_videos_raw`. + - For each time interval in `video_data['intervals']`: + - Extracts the segment and applies cropping according to the corresponding entry in `video_data['crops']`. + - Saves each clip in `output_dir` with a file name formatted as `{video_name}_{clip_idx:03d}.mp4`. + - Logs errors to the console if downloading or processing fails for a particular segment or crop. + + Returns: + None + + Raises: + ValueError: If the video cannot be downloaded or if any of the cropping or segmentation fails. + + Example: + >>> video_data = { + 'id': 'abc123', + 'name': 'sample_video', + 'intervals': [[0, 10], [15, 25]], + 'crops': [[0, 128, 0, 128], [10, 118, 10, 118]], + 'output_dir': '/tmp/data/hdtf', + 'resolution': '720p' + } + >>> download_and_process_video(video_data, '/tmp/data/hdtf') + + Notes: + - This function requires `ffmpeg` to be installed for segmenting and cropping video clips. + - Detailed logging is provided to indicate the status of each clip's download and processing. + - If `download_video` fails, an error message is printed to the console, and the function skips further processing. """ - raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") - raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") + raw_download_path = os.path.join( + output_dir, '_videos_raw', f"{video_data['name']}.mp4") + raw_download_log_file = os.path.join( + output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") print(f"{Fore.LIGHTBLUE_EX} raw_download_path: {raw_download_path}") - - download_result = download_video(video_data['id'], raw_download_path, log_file=raw_download_log_file) + + download_result = download_video( + video_data['id'], raw_download_path, log_file=raw_download_log_file) if not download_result: print(f'{Fore.RED} Failed to download {video_data["name"]}') @@ -131,17 +304,43 @@ def download_and_process_video(video_data: Dict, output_dir: str): start, end = video_data['intervals'][clip_idx] clip_name = f'{video_data["name"]}_{clip_idx:03d}' clip_path = os.path.join(output_dir, clip_name + '.mp4') - crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) + crop_success = cut_and_crop_video( + raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) pprint.pprint(video_data, indent=4, sort_dicts=False) continue - def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: """ - Reads a file as a space-separated dataframe, where the first column is the index + Reads a space-separated file and returns its contents as a dictionary. + + This function reads a text file where each line contains space-separated values. + The first value in each line is treated as the key, and the remaining values are + stored as a list associated with that key. This is useful for parsing metadata + files with a consistent space-separated format. + + Args: + filepath (os.PathLike): The path to the file to be read. + + Returns: + Dict: A dictionary where each key corresponds to the first item in a line, + and each value is a list of the remaining items in that line. + + Example: + Suppose `example.txt` contains: + video1 1280 720 + video2 640 480 + >>> read_file_as_space_separated_data("example.txt") + {'video1': ['1280', '720'], 'video2': ['640', '480']} + + Notes: + - Blank lines are not supported and may cause errors. + - Each line must contain at least one space-separated value to be valid. + + Raises: + IOError: If the file cannot be opened or read. """ with open(filepath, 'r') as f: lines = f.read().splitlines() @@ -150,29 +349,60 @@ def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: return data -def download_video(video_id, download_path, resolution: int = None, video_format="bestvideo+bestaudio", log_file=None): + +def download_video(video_id, download_path, video_format="bestvideo+bestaudio", log_file=None): """ - Download video from YouTube. - :param video_id: YouTube ID of the video. - :param download_path: Where to save the video. - :param resolution: Desired resolution (not currently used in yt-dlp config). - :param video_format: Format to download (default is best video and audio). - :param log_file: Path to a log file for yt-dlp. - :return: Tuple: path to the downloaded video and a bool indicating success. + Downloads a YouTube video in the specified format and saves it to a given path. + + This function uses `yt-dlp` to download a video by its YouTube ID, selecting the highest + available quality by default. It provides options for specifying a custom format or resolution + and can log download progress and errors to a specified log file. + + Args: + video_id (str): The YouTube ID of the video to download. + download_path (str): The full path (including file name) where the downloaded video will be saved. + video_format (str, optional): The video and audio format selection for yt-dlp. Defaults to + "bestvideo+bestaudio" for highest available quality. + log_file (str, optional): Path to a file where log messages (debug, warnings, and errors) + will be recorded. If None, logging to a file is disabled. + + Returns: + Tuple[str, bool]: A tuple where: + - The first element is the path to the downloaded video file. + - The second element is a boolean indicating success (True if the file + was downloaded successfully, False otherwise). + + Workflow: + 1. Constructs `yt-dlp` options based on the provided arguments, including `format`, `outtmpl`, + and `logger` if a log file is specified. + 2. Attempts to download the video. If successful, verifies the file exists at `download_path`. + 3. Logs errors if the download fails and saves them to `log_file` if specified. + + Raises: + Exception: Any exceptions during the download are logged if `log_file` is provided, and the + function will return False for success. + + Example: + >>> download_video("abc123", "/path/to/video.mp4", log_file="/path/to/log.txt") + ("/path/to/video.mp4", True) + + Notes: + - Requires `yt-dlp` to be installed. + - Requires `ffmpeg` if merging video and audio streams is necessary. + - Custom logging is provided through a nested `Logger` class if `log_file` is specified. """ - class Logger: """ A simple logger for yt-dlp to write debug, warning, and error messages to a specified log file. - + Attributes: log_path (str): Path to the log file where messages will be written. """ - + def __init__(self, log_path): """ Initializes the Logger with a log file path. - + :param log_path: Path to the file where log messages should be saved. """ self.log_path = log_path @@ -180,7 +410,7 @@ def __init__(self, log_path): def debug(self, msg): """ Logs a debug message. - + :param msg: The debug message to log. """ with open(self.log_path, "a") as f: @@ -189,7 +419,7 @@ def debug(self, msg): def warning(self, msg): """ Logs a warning message. - + :param msg: The warning message to log. """ with open(self.log_path, "a") as f: @@ -198,7 +428,7 @@ def warning(self, msg): def error(self, msg): """ Logs an error message. - + :param msg: The error message to log. """ with open(self.log_path, "a") as f: @@ -206,16 +436,17 @@ def error(self, msg): # Define yt-dlp options ydl_opts = { - 'format': video_format, # Set video format to best video and audio by default + # Set video format to best video and audio by default + 'format': video_format, 'outtmpl': download_path, # Output path template 'quiet': True, # Suppress verbose output 'merge_output_format': 'mp4', # Ensure output format is MP4 } - + # If a log file is specified, configure the logger if log_file: ydl_opts['logger'] = Logger(log_file) - + # Download the video using yt-dlp try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: @@ -225,25 +456,71 @@ def error(self, msg): success = False if log_file: with open(log_file, "a") as f: - f.write(f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") - + f.write( + f"ERROR: Failed to download {video_id}. Exception: {str(e)}\n") + result = success and os.path.isfile(download_path) return download_path, result def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): - # if os.path.isfile(output_path): return True # File already exists - + """ + Cuts and crops a video segment from a specified start to end time and saves it to the output path. + + This function uses `ffmpeg` to: + 1. Extract a segment of the video from `start` to `end` time. + 2. Apply a crop filter to the segment based on the provided crop coordinates. + 3. Save the processed clip to `output_path` with the original quality preserved. + + Args: + raw_video_path (str): Path to the source video file to be processed. + output_path (str): Path where the processed video clip will be saved, including the file name. + start (float or int): Start time in seconds for the video segment to be cut. + end (float or int): End time in seconds for the video segment to be cut. + crop (List[int]): A list specifying crop parameters [x, width, y, height], where: + - x (int): The x-coordinate of the top-left corner of the crop area. + - width (int): The width of the crop area. + - y (int): The y-coordinate of the top-left corner of the crop area. + - height (int): The height of the crop area. + + Returns: + bool: True if the cutting and cropping were successful, False otherwise. + + Workflow: + 1. Constructs an `ffmpeg` command to cut the video from `start` to `end` and apply the specified crop filter. + 2. Executes the command with `subprocess.call` to process the video. + 3. Checks the return code to confirm successful execution. Prints a message if the process fails. + + Raises: + ValueError: If `crop` does not contain exactly four values, or if any component is invalid. + FileNotFoundError: If `ffmpeg` is not installed or accessible from the system PATH. + + Example: + >>> cut_and_crop_video( + raw_video_path="/path/to/source.mp4", + output_path="/path/to/clip.mp4", + start=10, + end=20, + crop=[50, 200, 30, 200] + ) + True + + Notes: + - Requires `ffmpeg` to be installed and accessible from the command line. + - If `output_path` already exists, it will be overwritten. + - `-qscale 0` is used to preserve the video quality. + - The crop filter uses the format `crop=width:height:x:y`, where `x` and `y` specify the top-left corner. + """ x, out_w, y, out_h = crop command = [ "ffmpeg", "-i", raw_video_path, - "-strict", "-2", # Some legacy arguments - "-loglevel", "quiet", # Verbosity arguments - "-qscale", "0", # Preserve the quality - "-y", # Overwrite if the file exists - "-ss", str(start), + "-strict", "-2", # Some legacy arguments + "-loglevel", "quiet", # Verbosity arguments + "-qscale", "0", # Preserve the quality + "-y", # Overwrite if the file exists + "-ss", str(start), "-to", str(end), - "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments output_path ] return_code = subprocess.call(command) @@ -254,11 +531,15 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) return success + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download HDTF dataset") - parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') - parser.add_argument('-o', '--output_dir', type=str, default='dataset', help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, default=1, help='Number of workers for downloading.') + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', + help='Path to the directory with the dataset description') + parser.add_argument('-o', '--output_dir', type=str, + default='dataset', help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, + default=1, help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From ee682ee6139c2cbe8e8a41bbeb1b6eb678910965 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 12:07:36 -0800 Subject: [PATCH 12/14] Minor fix to syntax - the crop filter portion shouldn't have been quoted. --- download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/download.py b/download.py index 45f5846..ef75ac1 100644 --- a/download.py +++ b/download.py @@ -308,7 +308,7 @@ def download_and_process_video(video_data: Dict, output_dir: str): raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) if not crop_success: - print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}', video_data) + print(f'{Fore.RED} Failed to cut-and-crop clip #{clip_idx}') pprint.pprint(video_data, indent=4, sort_dicts=False) continue @@ -520,7 +520,7 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) "-y", # Overwrite if the file exists "-ss", str(start), "-to", str(end), - "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + "-filter:v", f"crop={out_w}:{out_h}:{x}:{y}", # Crop arguments output_path ] return_code = subprocess.call(command) From 05007c887858d8eabc7f6cd8064d63514f9bc126 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 13:05:32 -0800 Subject: [PATCH 13/14] Tweaks to formatting for consistency. --- download.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/download.py b/download.py index ef75ac1..8d80d32 100644 --- a/download.py +++ b/download.py @@ -536,10 +536,10 @@ def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]) parser = argparse.ArgumentParser(description="Download HDTF dataset") parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset description') - parser.add_argument('-o', '--output_dir', type=str, - default='dataset', help='Where to save the videos?') - parser.add_argument('-w', '--num_workers', type=int, - default=1, help='Number of workers for downloading.') + parser.add_argument('-o', '--output_dir', type=str, default='download', + help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=1, + help='Number of workers for downloading.') args = parser.parse_args() download_hdtf( From 43d0c35cd90ef60226002df1cb0b43c6097894c3 Mon Sep 17 00:00:00 2001 From: Eddie Offermann Date: Thu, 14 Nov 2024 13:14:01 -0800 Subject: [PATCH 14/14] Adding the download directory and main log file to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..46c31ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +download/ +output_logfile.txt