diff --git a/.vscode/launch.json b/.vscode/launch.json index bc5c98c3..0af1a5f5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -116,6 +116,21 @@ "PoTeC" ], }, + { + "name": "Download data", + "type": "debugpy", + "request": "launch", + "program": "src/data/preprocessing/download_data.py", + "console": "integratedTerminal", + "justMyCode": false, + "env": { + "PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT": "20.0", + }, + "args": [ + "--dataset", + "MECOL2" + ], + }, { "name": "Union files", "type": "debugpy", diff --git a/environment.yml b/environment.yml index cd43cfa0..b55cc1a3 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - rdata=1.0.0 - pip: - -e . # For development purposes - - git+https://github.com/aeye-lab/pymovements.git@siqube-stack-pr + - pymovements==0.25.0 - git+https://github.com/lacclab/text-metrics.git - en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl - da_core_news_sm@https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.8.0/da_core_news_sm-3.8.0-py3-none-any.whl diff --git a/src/data/preprocessing/download_data.py b/src/data/preprocessing/download_data.py index 444335a6..534d6d14 100644 --- a/src/data/preprocessing/download_data.py +++ b/src/data/preprocessing/download_data.py @@ -5,6 +5,7 @@ import rdata import requests from loguru import logger +from pymovements import ResourceDefinitions from tqdm import tqdm from src.configs.constants import DataSets @@ -72,7 +73,10 @@ def convert_rda_to_csv(root: Path, dataset_name: str) -> None: def prepare_dataset_definition(dataset_name: str): """Prepare dataset definition with gaze files disabled.""" dataset_def = pm.DatasetLibrary.get(dataset_name) - dataset_def.has_files['gaze'] = False + dataset_def.resources = ResourceDefinitions( + [resource for resource in dataset_def.resources if resource.content != 'gaze'] + ) + return dataset_def diff --git a/src/data/preprocessing/union_raw_files.py b/src/data/preprocessing/union_raw_files.py index 4dd004ea..07098e90 100644 --- a/src/data/preprocessing/union_raw_files.py +++ b/src/data/preprocessing/union_raw_files.py @@ -7,6 +7,7 @@ import polars as pl import pymovements as pm from loguru import logger +from pymovements import ResourceDefinitions from src.configs.constants import STATS_FOLDER, DataSets from src.configs.data import get_data_args @@ -130,7 +131,9 @@ def combine_dataset(dataset_name: str) -> None: base = lookup[f'data_args_{part}'].base_path logger.info(f'Processing {dataset_name}{part}...') dataset_def = pm.DatasetLibrary.get(f'{dataset_name}{part}') - dataset_def.has_files['gaze'] = False + dataset_def.resources = ResourceDefinitions( + [resource for resource in dataset_def.resources if resource.content != 'gaze'] + ) logger.info(f'Loading {dataset_name}{part} dataset...') dataset = pm.Dataset(dataset_def, f'data/{dataset_name}{part}').load() if part == 'W1':