From 132faadeef5d7e2d47f694e9151aaaa062e9f2e4 Mon Sep 17 00:00:00 2001 From: "shubi (laccl01)" Date: Tue, 6 Jan 2026 20:26:18 +0200 Subject: [PATCH 1/2] Add launch configuration for downloading MECOL2 dataset , update to pymovements 0.25 and download all resources as a workaround for https://github.com/pymovements/pymovements/pull/1161#issuecomment-3715172011 --- .vscode/launch.json | 15 +++++++++++++++ environment.yml | 2 +- src/data/preprocessing/download_data.py | 3 ++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bc5c98c3..0af1a5f5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -116,6 +116,21 @@ "PoTeC" ], }, + { + "name": "Download data", + "type": "debugpy", + "request": "launch", + "program": "src/data/preprocessing/download_data.py", + "console": "integratedTerminal", + "justMyCode": false, + "env": { + "PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT": "20.0", + }, + "args": [ + "--dataset", + "MECOL2" + ], + }, { "name": "Union files", "type": "debugpy", diff --git a/environment.yml b/environment.yml index cd43cfa0..b55cc1a3 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - rdata=1.0.0 - pip: - -e . # For development purposes - - git+https://github.com/aeye-lab/pymovements.git@siqube-stack-pr + - pymovements==0.25.0 - git+https://github.com/lacclab/text-metrics.git - en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl - da_core_news_sm@https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.8.0/da_core_news_sm-3.8.0-py3-none-any.whl diff --git a/src/data/preprocessing/download_data.py b/src/data/preprocessing/download_data.py index 444335a6..7f9b99a7 100644 --- a/src/data/preprocessing/download_data.py +++ b/src/data/preprocessing/download_data.py @@ -72,7 +72,8 @@ def convert_rda_to_csv(root: Path, dataset_name: str) -> None: def prepare_dataset_definition(dataset_name: str): """Prepare dataset definition with gaze files disabled.""" dataset_def = pm.DatasetLibrary.get(dataset_name) - dataset_def.has_files['gaze'] = False + # dataset_def.resources = [resource for resource in dataset_def.resources if resource.content != 'gaze'] + return dataset_def From cdea83500a8d0938160f7dd6c03b285fddf7c371 Mon Sep 17 00:00:00 2001 From: SiQube Date: Thu, 8 Jan 2026 12:10:41 +0100 Subject: [PATCH 2/2] update exclude gaze --- src/data/preprocessing/download_data.py | 5 ++++- src/data/preprocessing/union_raw_files.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/data/preprocessing/download_data.py b/src/data/preprocessing/download_data.py index 7f9b99a7..534d6d14 100644 --- a/src/data/preprocessing/download_data.py +++ b/src/data/preprocessing/download_data.py @@ -5,6 +5,7 @@ import rdata import requests from loguru import logger +from pymovements import ResourceDefinitions from tqdm import tqdm from src.configs.constants import DataSets @@ -72,7 +73,9 @@ def convert_rda_to_csv(root: Path, dataset_name: str) -> None: def prepare_dataset_definition(dataset_name: str): """Prepare dataset definition with gaze files disabled.""" dataset_def = pm.DatasetLibrary.get(dataset_name) - # dataset_def.resources = [resource for resource in dataset_def.resources if resource.content != 'gaze'] + dataset_def.resources = ResourceDefinitions( + [resource for resource in dataset_def.resources if resource.content != 'gaze'] + ) return dataset_def diff --git a/src/data/preprocessing/union_raw_files.py b/src/data/preprocessing/union_raw_files.py index 4dd004ea..07098e90 100644 --- a/src/data/preprocessing/union_raw_files.py +++ b/src/data/preprocessing/union_raw_files.py @@ -7,6 +7,7 @@ import polars as pl import pymovements as pm from loguru import logger +from pymovements import ResourceDefinitions from src.configs.constants import STATS_FOLDER, DataSets from src.configs.data import get_data_args @@ -130,7 +131,9 @@ def combine_dataset(dataset_name: str) -> None: base = lookup[f'data_args_{part}'].base_path logger.info(f'Processing {dataset_name}{part}...') dataset_def = pm.DatasetLibrary.get(f'{dataset_name}{part}') - dataset_def.has_files['gaze'] = False + dataset_def.resources = ResourceDefinitions( + [resource for resource in dataset_def.resources if resource.content != 'gaze'] + ) logger.info(f'Loading {dataset_name}{part} dataset...') dataset = pm.Dataset(dataset_def, f'data/{dataset_name}{part}').load() if part == 'W1':