From 311285eb8b94e774521d72ee079fed45b7e4b340 Mon Sep 17 00:00:00 2001 From: Natan Bagrov Date: Thu, 27 Feb 2025 18:18:35 +0200 Subject: [PATCH] setup and misc fixes --- configs/experiment/val_vit_mae.yaml | 2 +- requirements.txt | 20 ++++++++++++++++++++ scripts/make_annot_file.py | 3 ++- setup.sh | 25 +++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 requirements.txt create mode 100644 setup.sh diff --git a/configs/experiment/val_vit_mae.yaml b/configs/experiment/val_vit_mae.yaml index 98d6eff..ec1e1a3 100644 --- a/configs/experiment/val_vit_mae.yaml +++ b/configs/experiment/val_vit_mae.yaml @@ -29,7 +29,7 @@ trainer: model: compile: true - pretrain: "../../checkpoints/vit_mae.pth + pretrain: "../../checkpoints/vit_mae.pth" tokenizer_cfg: _target_: src.models.tokenizer_utils.TokenizerConfig drop_policy: none diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..59c091f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +torch +torchaudio +torchvision +xformers==0.0.25 +einops==0.7.0 +numpy==1.26.4 +omegaconf +matplotlib +opencv-python +hydra-core==1.3.2 +hydra-colorlog==1.2.0 +hydra-optuna-sweeper==1.2.0 +ipdb +pytorch-lightning==2.2.0 +pytorchvideo +rich +rootutils +timm +tqdm +wandb \ No newline at end of file diff --git a/scripts/make_annot_file.py b/scripts/make_annot_file.py index 8398ea6..4983437 100644 --- a/scripts/make_annot_file.py +++ b/scripts/make_annot_file.py @@ -17,7 +17,8 @@ def main(args): # Validate the file. num_lines = sum(1 for _ in open(args.output_file, 'r')) for line in tqdm(open(args.output_file, 'r'), colour='green', total=num_lines): - path, _ = line.strip().split() + *splits, label = line.strip().split() + path = ' '.join(splits) assert os.path.exists(path) print("Done!") diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..6c51bad --- /dev/null +++ b/setup.sh @@ -0,0 +1,25 @@ +# conda env +conda create --name rlt python=3.10 -y +conda activate rlt + +# pip packages +cd /home/${USER}/PycharmProjects/rlt +pip install -r requirements.txt + +# decord specific +sudo apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev +pip install decord + +# download kinetics +cd /lustre/fsw/coreai_nvfm_vfm/$USER/ +git clone https://github.com/cvdfoundation/kinetics-dataset.git +cd kinetics-dataset +bash ./k400_downloader.sh +bash ./k400_extractor.sh +python arrange_by_classes.py --path /lustre/fsw/coreai_nvfm_vfm/$USER/kinetics-dataset/k400 + +# standardize to how rlt wants it to be +cd /home/${USER}/PycharmProjects/rlt +python scripts/make_annot_file.py --video_folder /lustre/fsw/coreai_nvfm_vfm/$USER/kinetics-dataset/k400/videos/train --output_file train.txt +python scripts/make_annot_file.py --video_folder /lustre/fsw/coreai_nvfm_vfm/$USER/kinetics-dataset/k400/videos/val --output_file val.txt +python scripts/make_annot_file.py --video_folder /lustre/fsw/coreai_nvfm_vfm/$USER/kinetics-dataset/k400/videos/test --output_file test.txt \ No newline at end of file