From 36c3ef44d3267a6d15c90a5372a5e8f809f66068 Mon Sep 17 00:00:00 2001 From: Ivan Skorokhodov Date: Mon, 23 Aug 2021 16:39:06 +0300 Subject: [PATCH 1/4] feat: add the downloading script --- download.py | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 download.py diff --git a/download.py b/download.py new file mode 100644 index 0000000..3e21b61 --- /dev/null +++ b/download.py @@ -0,0 +1,242 @@ +""" +This file downloads almost all the videos from the HDTF dataset. Some videos are discarded for the following reasons: +- they do not contain cropping information because they are somewhat noisy (hand moving, background changing, etc.) +- they are not available on youtube anymore (at all or in the specified format) + +The discarded videos constitute a small portion of the dataset, so you can try to re-download them manually on your own. + +Usage: +``` +$ python download.py --output_dir /tmp/data/hdtf --num_workers 8 +``` + +You need tqdm and youtube-dl libraries to be installed for this script to work. +""" + + +import os +import argparse +from typing import List, Dict +from multiprocessing import Pool +import subprocess +from subprocess import Popen, PIPE +from urllib import parse + +from tqdm import tqdm + + +subsets = ["RD", "WDA", "WRA"] + + +def download_hdtf(source_dir: os.PathLike, output_dir: os.PathLike, num_workers: int, **process_video_kwargs): + os.makedirs(output_dir, exist_ok=True) + os.makedirs(os.path.join(output_dir, '_videos_raw'), exist_ok=True) + + download_queue = construct_download_queue(source_dir, output_dir) + task_kwargs = [dict( + video_data=vd, + output_dir=output_dir, + **process_video_kwargs, + ) for vd in download_queue] + pool = Pool(processes=num_workers) + tqdm_kwargs = dict(total=len(task_kwargs), desc=f'Downloading videos into {output_dir} (note: without sound)') + + for _ in tqdm(pool.imap_unordered(task_proxy, task_kwargs), **tqdm_kwargs): + pass + + print('Download is finished, you can now (optionally) delete the following directories, since they are not needed anymore and occupy a lot of space:') + print(' -', os.path.join(output_dir, '_videos_raw')) + + +def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) -> List[Dict]: + download_queue = [] + + for subset in subsets: + video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) + crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) + intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) + resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) + + for video_name, (video_url,) in video_urls.items(): + if not f'{video_name}.mp4' in intervals: + print(f'Entire {subset}/{video_name} does not contain any clip intervals, hence is broken. Discarding it.') + continue + + if not f'{video_name}.mp4' in resolutions or len(resolutions[f'{video_name}.mp4']) > 1: + print(f'Entire {subset}/{video_name} does not contain the resolution (or it is in a bad format), hence is broken. Discarding it.') + continue + + all_clips_intervals = [x.split('-') for x in intervals[f'{video_name}.mp4']] + clips_crops = [] + clips_intervals = [] + + for clip_idx, clip_interval in enumerate(all_clips_intervals): + clip_name = f'{video_name}_{clip_idx}.mp4' + if not clip_name in crops: + print(f'Clip {subset}/{clip_name} is not present in crops, hence is broken. Discarding it.') + continue + clips_crops.append(crops[clip_name]) + clips_intervals.append(clip_interval) + + clips_crops = [list(map(int, cs)) for cs in clips_crops] + + if len(clips_crops) == 0: + print(f'Entire {subset}/{video_name} does not contain any crops, hence is broken. Discarding it.') + continue + + assert len(clips_intervals) == len(clips_crops) + assert set([len(vi) for vi in clips_intervals]) == {2}, f"Broken time interval, {clips_intervals}" + assert set([len(vc) for vc in clips_crops]) == {4}, f"Broken crops, {clips_crops}" + assert all([vc[1] == vc[3] for vc in clips_crops]), f'Some crops are not square, {clips_crops}' + + download_queue.append({ + 'name': f'{subset}_{video_name}', + 'id': parse.parse_qs(parse.urlparse(video_url).query)['v'][0], + 'intervals': clips_intervals, + 'crops': clips_crops, + 'output_dir': output_dir, + 'resolution': resolutions[f'{video_name}.mp4'][0] + }) + + return download_queue + + +def task_proxy(kwargs): + return download_and_process_video(**kwargs) + + +def download_and_process_video(video_data: Dict, output_dir: str): + """ + Downloads the video and cuts/crops it into several ones according to the provided time intervals + """ + raw_download_path = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}.mp4") + raw_download_log_file = os.path.join(output_dir, '_videos_raw', f"{video_data['name']}_download_log.txt") + download_result = download_video(video_data['id'], raw_download_path, resolution=video_data['resolution'], log_file=raw_download_log_file) + + if not download_result: + print('Failed to download', video_data) + print(f'See {raw_download_log_file} for details') + return + + # We do not know beforehand, what will be the resolution of the downloaded video + # Youtube-dl selects a (presumably) highest one + video_resolution = get_video_resolution(raw_download_path) + if not video_resolution != video_data['resolution']: + print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + return + + for clip_idx in range(len(video_data['intervals'])): + start, end = video_data['intervals'][clip_idx] + clip_name = f'{video_data["name"]}_{clip_idx:03d}' + clip_path = os.path.join(output_dir, clip_name + '.mp4') + crop_success = cut_and_crop_video(raw_download_path, clip_path, start, end, video_data['crops'][clip_idx]) + + if not crop_success: + print(f'Failed to cut-and-crop clip #{clip_idx}', video_data) + continue + + +def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: + """ + Reads a file as a space-separated dataframe, where the first column is the index + """ + with open(filepath, 'r') as f: + lines = f.read().splitlines() + lines = [[v.strip() for v in l.strip().split(' ')] for l in lines] + data = {l[0]: l[1:] for l in lines} + + return data + + +def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): + """ + Download video from YouTube. + :param video_id: YouTube ID of the video. + :param download_path: Where to save the video. + :param video_format: Format to download. + :param log_file: Path to a log file for youtube-dl. + :return: Tuple: path to the downloaded video and a bool indicating success. + + Copy-pasted from https://github.com/ytdl-org/youtube-dl + """ + # if os.path.isfile(download_path): return True # File already exists + + if log_file is None: + stderr = subprocess.DEVNULL + else: + stderr = open(log_file, "a") + video_selection = f"bestvideo[ext={video_format}]" + video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" + command = [ + "youtube-dl", + "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", + video_selection, + "--output", download_path, + "--no-continue" + ] + return_code = subprocess.call(command, stderr=stderr) + success = return_code == 0 + + if log_file is not None: + stderr.close() + + return success and os.path.isfile(download_path) + + +def get_video_resolution(video_path: os.PathLike) -> int: + command = ' '.join([ + "ffprobe", + "-v", "error", + "-select_streams", "v:0", "-show_entries", "stream=height", "-of", "csv=p=0", + video_path + ]) + + process = Popen(command, stdout=PIPE, shell=True) + (output, err) = process.communicate() + return_code = process.wait() + success = return_code == 0 + + if not success: + print('Command failed:', command) + return -1 + + return int(output) + + +def cut_and_crop_video(raw_video_path, output_path, start, end, crop: List[int]): + # if os.path.isfile(output_path): return True # File already exists + + x, out_w, y, out_h = crop + + command = ' '.join([ + "ffmpeg", "-i", raw_video_path, + "-strict", "-2", # Some legacy arguments + "-loglevel", "quiet", # Verbosity arguments + "-qscale", "0", # Preserve the quality + "-y", # Overwrite if the file exists + "-ss", str(start), "-to", str(end), # Cut arguments + "-filter:v", f'"crop={out_w}:{out_h}:{x}:{y}"', # Crop arguments + output_path + ]) + + return_code = subprocess.call(command, shell=True) + success = return_code == 0 + + if not success: + print('Command failed:', command) + + return success + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download HDTF dataset") + parser.add_argument('-s', '--source_dir', type=str, default='HDTF_dataset', help='Path to the directory with the dataset') + parser.add_argument('-o', '--output_dir', type=str, help='Where to save the videos?') + parser.add_argument('-w', '--num_workers', type=int, default=8, help='Number of workers for downloading') + args = parser.parse_args() + + download_hdtf( + args.source_dir, + args.output_dir, + args.num_workers, + ) From 8c402f412953c77771b5c4ae03dac6dc6bdb95e2 Mon Sep 17 00:00:00 2001 From: Ivan Skorokhodov Date: Mon, 23 Aug 2021 16:44:38 +0300 Subject: [PATCH 2/4] doc: add the downloading instructions --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fcd4cd7..036b12a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # HDTF -Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset +Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset paper supplementary ## Details of HDTF dataset **./HDTF_dataset** consists of *youtube video url*, *video resolution* (in our method, may not be the best resolution), *time stamps of talking face*, *facial region* (in the our method) and *the zoom scale* of the cropped window. -**xx_video_url.txt:** +**xx_video_url.txt:** ``` @@ -29,24 +29,31 @@ format: video name+clip index | min_width | width | min_height | height (in format: video name+clip index | window zoom scale ``` - ## Processing of HDTF dataset -When using HDTF dataset, +When using HDTF dataset, - We provide video and url in **xx_video_url.txt**. (the highest definition of videos are 1080P or 720P). Transform video into **.mp4** format and transform interlaced video to progressive video as well. - We split long original video into talking head clips with time stamps in **xx_annotion_time.txt**. Name the splitted clip as **video name_clip index.mp4**. For example, split the video *Radio11.mp4 00:30-01:00 01:30-02:30* into *Radio11_0.mp4* and *Radio11_1.mp4* . - - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! + - Our work does not always download videos with the best resolution, so we provide two cropping methods. Thanks @universome and @Feii Yin for pointing out this problem! 1. Download the video with reference resulotion in **xx_resolution.txt** and crop the facial region with fixed window size in **xx_crop_wh.txt**. (This method is as same as ours, but the downloaded video may not be the best resolution). - 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. + 2. First, download the video with best resulotion. Then, detect the facial landmark in the splitted talking head clips and count the square window of the face, specifically, count the facial region in each frame and merge all regions into one square range. Next, enlarge the window size with **xx_crop_ratio.txt**. Finally, crop the facial region. - We resize all cropped videos into **512 x 512** resolution. The HDTF dataset is available to download under a Creative Commons Attribution 4.0 International License. If you face any problems when processing HDTF, pls contact me. +## Downloading +For convenience, we added the `download.py` script which downloads, crops and resizes the dataset. You can use it via the following command: +``` +python download.py --output_dir /path/to/output/dir --num_workers 8 +``` + +Note: some videos might become unavailable if the authors will remove them or make them private. + ## Reference if you use HDTF, pls reference From c3cb2aaf7187d9e34e4a529176a2c0c55235dc14 Mon Sep 17 00:00:00 2001 From: nawta Date: Mon, 23 Jun 2025 05:30:25 +0900 Subject: [PATCH 3/4] modified download.py and README.md for 2025 ver --- README.md | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++- download.py | 47 +++++++++++------ 2 files changed, 173 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 036b12a..4890b94 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,23 @@ Flow-guided One-shot Talking Face Generation with a High-resolution Audio-visual Dataset paper supplementary +## Overview + +The HDTF (High-resolution Talking Face) dataset provides high-quality talking face videos for research in audio-visual speech processing, lip synchronization, and talking face generation. This repository contains an improved version of the dataset downloader that automates the process of downloading, cutting, and cropping videos from YouTube. + +## Dataset Structure + +The HDTF dataset is organized into three subsets: +- **RD (Radio)**: Radio/podcast style videos +- **WDA**: Videos featuring various speakers (including political figures) +- **WRA**: Additional speaker videos + ## Details of HDTF dataset **./HDTF_dataset** consists of *youtube video url*, *video resolution* (in our method, may not be the best resolution), *time stamps of talking face*, *facial region* (in the our method) and *the zoom scale* of the cropped window. -**xx_video_url.txt:** +### Metadata Files +**xx_video_url.txt:** ``` format: video name | video youtube url ``` @@ -46,14 +58,141 @@ When using HDTF dataset, The HDTF dataset is available to download under a Creative Commons Attribution 4.0 International License. If you face any problems when processing HDTF, pls contact me. +## Installation + +```bash +# Install required dependencies +pip install tqdm yt-dlp + +# Ensure ffmpeg is installed +# Ubuntu/Debian: +sudo apt-get install ffmpeg +# macOS: +brew install ffmpeg +``` + ## Downloading -For convenience, we added the `download.py` script which downloads, crops and resizes the dataset. You can use it via the following command: +For convenience, we provide the `download.py` script which downloads, crops and resizes the dataset. You can use it via the following command: ``` python download.py --output_dir /path/to/output/dir --num_workers 8 ``` +### Command Line Arguments +- `--source_dir` or `-s`: Path to metadata directory (default: `HDTF_dataset`) +- `--output_dir` or `-o`: Where to save processed videos (required) +- `--num_workers` or `-w`: Number of parallel download workers (default: 8) + Note: some videos might become unavailable if the authors will remove them or make them private. +## Output Structure + +After running `download.py`, the output directory will contain: + +``` +output_dir/ +├── _videos_raw/ # Temporary directory (can be deleted after processing) +│ ├── {subset}_{videoname}.mp4 # Raw downloaded videos +│ └── {subset}_{videoname}_download_log.txt # Download logs +├── {subset}_{videoname}_000.mp4 # Processed clip 1 +├── {subset}_{videoname}_001.mp4 # Processed clip 2 +└── ... # More clips +``` + +### Output Characteristics +- **Video only**: No audio track included +- **Square format**: All clips are square (width = height) +- **High resolution**: Maintains original resolution (720p or 1080p) +- **Face-centered**: Cropped to center on the speaker's face +- **Systematic naming**: `{subset}_{videoname}_{clipindex:03d}.mp4` + +## Processing Pipeline + +The `download.py` script performs the following steps: + +1. **Download**: Fetches videos from YouTube at specified resolution using yt-dlp +2. **Cut**: Extracts time intervals containing talking faces based on annotation files +3. **Crop**: Applies facial region cropping to create square clips using FFmpeg +4. **Save**: Outputs individual clips with systematic naming + +## Improvements Over Original Code + +### 1. **Migration to yt-dlp** +- **Original**: Used deprecated `youtube-dl` library +- **Improved**: Updated to actively maintained `yt-dlp` with better reliability +- **Benefits**: + - Better handling of YouTube API changes + - Improved download success rates + - More robust error handling + +### 2. **Enhanced Download Robustness** +Added several flags to improve download reliability: +```python +"--retries", "3", # Retry failed downloads +"--fragment-retries", "3", # Retry failed fragments +"--no-part", # Avoid partial file issues +"--no-check-certificate", # Handle SSL certificate issues +"--merge-output-format", mp4 # Ensure consistent output format +``` + +### 3. **Improved Format Selection** +- **Original**: Basic format string that could fail +- **Improved**: Sophisticated format selection with fallbacks +```python +# Example for 720p: +"bestvideo[height=720][ext=mp4]+bestaudio[ext=m4a]/best[height=720][ext=mp4]" +``` + +### 4. **Better Error Handling** +- Added graceful handling of missing metadata files +- Skip subsets if files are not found instead of crashing +- More informative error messages for debugging +- Individual download logs for each video + +### 5. **Code Quality Improvements** +- Added comprehensive documentation and docstrings +- Better variable naming for clarity +- Type hints in function signatures +- Improved code organization and readability + +### 6. **Robustness Features** +- File existence checks before processing +- Validation of downloaded video resolution +- Proper handling of videos with multiple clips +- Clear progress indication with tqdm + +## Troubleshooting + +### Common Issues + +1. **Download Failures** + - Check internet connection + - Verify YouTube URLs are still valid + - Review individual download logs in `_videos_raw/` + - Some videos may have been removed or made private + +2. **Resolution Mismatches** + - The requested resolution may not be available + - Script will skip videos if downloaded resolution doesn't match metadata + - Consider updating metadata files if needed + +3. **Missing Videos** + - Videos without proper cropping information are skipped + - Check console output for specific reasons + - Some videos may be discarded due to quality issues + +### Manual Recovery + +For failed downloads: +1. Check the download log: `_videos_raw/{video_name}_download_log.txt` +2. Try downloading manually with yt-dlp +3. Update metadata files if video formats have changed + +## Storage Notes + +- The `_videos_raw/` directory contains full-length downloaded videos +- This directory can be safely deleted after processing to save space +- Final processed clips are much smaller than raw videos + ## Reference if you use HDTF, pls reference diff --git a/download.py b/download.py index 3e21b61..d7c63f1 100644 --- a/download.py +++ b/download.py @@ -52,7 +52,13 @@ def construct_download_queue(source_dir: os.PathLike, output_dir: os.PathLike) - download_queue = [] for subset in subsets: - video_urls = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_video_url.txt')) + # Check if subset files exist + url_file = os.path.join(source_dir, f'{subset}_video_url.txt') + if not os.path.exists(url_file): + print(f"Skipping {subset} subset - files not found") + continue + + video_urls = read_file_as_space_separated_data(url_file) crops = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_crop_wh.txt')) intervals = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_annotion_time.txt')) resolutions = read_file_as_space_separated_data(os.path.join(source_dir, f'{subset}_resolution.txt')) @@ -121,8 +127,8 @@ def download_and_process_video(video_data: Dict, output_dir: str): # We do not know beforehand, what will be the resolution of the downloaded video # Youtube-dl selects a (presumably) highest one video_resolution = get_video_resolution(raw_download_path) - if not video_resolution != video_data['resolution']: - print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['name']}. Discarding this video.") + if video_resolution != int(video_data['resolution']): + print(f"Downloaded resolution is not correct for {video_data['name']}: {video_resolution} vs {video_data['resolution']}. Discarding this video.") return for clip_idx in range(len(video_data['intervals'])): @@ -150,14 +156,13 @@ def read_file_as_space_separated_data(filepath: os.PathLike) -> Dict: def download_video(video_id, download_path, resolution: int=None, video_format="mp4", log_file=None): """ - Download video from YouTube. + Download video from YouTube using yt-dlp. :param video_id: YouTube ID of the video. :param download_path: Where to save the video. + :param resolution: Specific resolution to download. :param video_format: Format to download. - :param log_file: Path to a log file for youtube-dl. - :return: Tuple: path to the downloaded video and a bool indicating success. - - Copy-pasted from https://github.com/ytdl-org/youtube-dl + :param log_file: Path to a log file for yt-dlp. + :return: Bool indicating success. """ # if os.path.isfile(download_path): return True # File already exists @@ -165,15 +170,27 @@ def download_video(video_id, download_path, resolution: int=None, video_format=" stderr = subprocess.DEVNULL else: stderr = open(log_file, "a") - video_selection = f"bestvideo[ext={video_format}]" - video_selection = video_selection if resolution is None else f"{video_selection}[height={resolution}]" + + # yt-dlp format selection is more flexible + if resolution is None: + format_selection = f"bestvideo[ext={video_format}]+bestaudio[ext=m4a]/best[ext={video_format}]" + else: + format_selection = f"bestvideo[height={resolution}][ext={video_format}]+bestaudio[ext=m4a]/best[height={resolution}][ext={video_format}]" + command = [ - "youtube-dl", - "https://youtube.com/watch?v={}".format(video_id), "--quiet", "-f", - video_selection, - "--output", download_path, - "--no-continue" + "yt-dlp", + f"https://youtube.com/watch?v={video_id}", + "--quiet", + "-f", format_selection, + "-o", download_path, + "--no-continue", + "--no-check-certificate", # Helps with SSL issues + "--retries", "3", # Retry failed downloads + "--fragment-retries", "3", # Retry failed fragments + "--no-part", # Don't use .part files + "--merge-output-format", video_format # Ensure output format ] + return_code = subprocess.call(command, stderr=stderr) success = return_code == 0 From 5a0195f8111a5d5519253ec93c6b8f75a90ccd3d Mon Sep 17 00:00:00 2001 From: nawta Date: Mon, 23 Jun 2025 05:41:58 +0900 Subject: [PATCH 4/4] change specification --- Log_en.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 Log_en.md diff --git a/Log_en.md b/Log_en.md new file mode 100644 index 0000000..4952f80 --- /dev/null +++ b/Log_en.md @@ -0,0 +1,53 @@ +# HDTF Download Script Update Log + +## 2025-06-22 + +### Migration from youtube-dl to yt-dlp + +#### Background +- Multiple errors occurred in the `download.py` script that was using `youtube-dl` +- Due to changes in YouTube's specifications, `youtube-dl` was no longer functioning correctly + +#### Modifications + +1. **Updated Download Function** (`download_video` function) + - Changed from `youtube-dl` to `yt-dlp` + - Improved format selection: + - Old: `bestvideo[ext=mp4]` (video only) + - New: `bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]` (video+audio) + - Additional options: + - `--no-check-certificate`: Avoid SSL certificate errors + - `--retries 3`: Retry on download failure + - `--fragment-retries 3`: Retry on fragment download failure + - `--no-part`: Do not use .part files + - `--merge-output-format`: Ensure specified output format + +2. **Improved Error Handling** (`construct_download_queue` function) + - Added functionality to skip when subset files don't exist + - Improved to avoid processing unnecessary subsets during test runs + +3. **Bug Fixes** + - Fixed logic error in resolution comparison: + - Old: `if not video_resolution != video_data['resolution']:` (double negative) + - New: `if video_resolution != int(video_data['resolution']):` + - Also fixed string vs integer comparison error + +4. **Documentation Updates** + - Updated help text and comments from `youtube-dl` to `yt-dlp` + +#### Test Results +- Successfully tested with a single video +- Confirmed successful video download, cropping, and resizing + +#### Usage +```bash +# Verify yt-dlp is installed +which yt-dlp + +# Download HDTF dataset +python download.py --output_dir /data/nishida/HDTF --num_workers 8 +``` + +#### Notes +- Some videos may not be available for download as they might have been removed from YouTube +- The `_videos_raw` directory can be deleted after processing (to save disk space)