Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@
"PoTeC"
],
},
{
"name": "Download data",
"type": "debugpy",
"request": "launch",
"program": "src/data/preprocessing/download_data.py",
"console": "integratedTerminal",
"justMyCode": false,
"env": {
"PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT": "20.0",
},
"args": [
"--dataset",
"MECOL2"
],
},
{
"name": "Union files",
"type": "debugpy",
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- rdata=1.0.0
- pip:
- -e . # For development purposes
- git+https://github.com/aeye-lab/pymovements.git@siqube-stack-pr
- pymovements==0.25.0
- git+https://github.com/lacclab/text-metrics.git
- en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
- da_core_news_sm@https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.8.0/da_core_news_sm-3.8.0-py3-none-any.whl
Expand Down
6 changes: 5 additions & 1 deletion src/data/preprocessing/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import rdata
import requests
from loguru import logger
from pymovements import ResourceDefinitions
from tqdm import tqdm

from src.configs.constants import DataSets
Expand Down Expand Up @@ -72,7 +73,10 @@ def convert_rda_to_csv(root: Path, dataset_name: str) -> None:
def prepare_dataset_definition(dataset_name: str):
"""Prepare dataset definition with gaze files disabled."""
dataset_def = pm.DatasetLibrary.get(dataset_name)
dataset_def.has_files['gaze'] = False
dataset_def.resources = ResourceDefinitions(
[resource for resource in dataset_def.resources if resource.content != 'gaze']
)

return dataset_def


Expand Down
5 changes: 4 additions & 1 deletion src/data/preprocessing/union_raw_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import polars as pl
import pymovements as pm
from loguru import logger
from pymovements import ResourceDefinitions

from src.configs.constants import STATS_FOLDER, DataSets
from src.configs.data import get_data_args
Expand Down Expand Up @@ -130,7 +131,9 @@ def combine_dataset(dataset_name: str) -> None:
base = lookup[f'data_args_{part}'].base_path
logger.info(f'Processing {dataset_name}{part}...')
dataset_def = pm.DatasetLibrary.get(f'{dataset_name}{part}')
dataset_def.has_files['gaze'] = False
dataset_def.resources = ResourceDefinitions(
[resource for resource in dataset_def.resources if resource.content != 'gaze']
)
logger.info(f'Loading {dataset_name}{part} dataset...')
dataset = pm.Dataset(dataset_def, f'data/{dataset_name}{part}').load()
if part == 'W1':
Expand Down
Loading