From e01a6bf9cce5bd48e74d93f3813cbeb39a47c68e Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 24 Oct 2024 14:11:57 -0400
Subject: [PATCH 1/4] Add github action to codespell master on push and PRs

---
 .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 0000000..e21712e
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2

From fd9d1d2c811fbedb668b787e2b6b755a6c8d2057 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 24 Oct 2024 14:11:57 -0400
Subject: [PATCH 2/4] Add rudimentary codespell config

---
 pyproject.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index a50bf3c..c2f3e84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,3 +29,10 @@ webdataset = "0.1.103"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.lock'
+check-hidden = true
+# ignore-regex = ''
+# ignore-words-list = ''

From e066455c4be989278f00c9cf99825e64c7d478ad Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 24 Oct 2024 14:40:20 -0400
Subject: [PATCH 3/4] [DATALAD RUNCMD] run codespell throughout fixing typos
 automagically (but ignoring overall fail due to ambigous ones)

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w || :",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 README.md                                                       | 2 +-
 scripts/analyses/fig5_downstream_performance.py                 | 2 +-
 scripts/analyses/sfig4_5_downstream_learning_curves.py          | 2 +-
 scripts/analyses/sfig6_downstream_performance_replication.py    | 2 +-
 .../analyses/sfig7_downstream_learning_curves_replication.py    | 2 +-
 scripts/dataprep/upstream/dataprep.py                           | 2 +-
 scripts/train.py                                                | 2 +-
 src/batcher/make.py                                             | 2 +-
 src/decoder/make.py                                             | 2 +-
 tests/test_adapt.py                                             | 2 +-
 tests/test_checkpoints.py                                       | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index d40fbf9..df45758 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ We use [WebDataset](https://github.com/webdataset/webdataset) to read data durin
 
 **Upstream:** All upstream data contain three core entries for I) the parcelated BOLD data (`bold.pyd`), II) its repetition time (`t_r.pyd`), and III) a key (`__key__`) indicating the specific subject / task / run that this .tar file corresponds to. 
 
-**Downstream:** Each .tar file of the two downstream datasets contains one sample for each trial of its experiment run. In addition to the three entries listed for the upstream data, each trial also contains information describing the associated mental state. In our downstream adapatation analyses, we utilize the `task_label.pyd` (MDTB data) and `label_across_tasks.pyd` (HCP data) entries to assign numerical labels to each mental state during training. Note that the MDTB data is indicated with its OpenNeuro identifier (`ds002105`) in the `data/downstream/` directory. 
+**Downstream:** Each .tar file of the two downstream datasets contains one sample for each trial of its experiment run. In addition to the three entries listed for the upstream data, each trial also contains information describing the associated mental state. In our downstream adaptation analyses, we utilize the `task_label.pyd` (MDTB data) and `label_across_tasks.pyd` (HCP data) entries to assign numerical labels to each mental state during training. Note that the MDTB data is indicated with its OpenNeuro identifier (`ds002105`) in the `data/downstream/` directory. 
 
 For details on the additional preprocessing applied to [fmriprep](https://fmriprep.org/en/stable/)'s derivatives for each dataset, see the scripts contained in `scripts/dataprep/`.
 
diff --git a/scripts/analyses/fig5_downstream_performance.py b/scripts/analyses/fig5_downstream_performance.py
index 210fd1a..351f388 100644
--- a/scripts/analyses/fig5_downstream_performance.py
+++ b/scripts/analyses/fig5_downstream_performance.py
@@ -123,7 +123,7 @@ def fig_downstream_performance(
 
 def get_args() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
-        description='figure 5 of the manuscript; downstream model adapatation performances'
+        description='figure 5 of the manuscript; downstream model adaptation performances'
     )
 
     parser.add_argument(
diff --git a/scripts/analyses/sfig4_5_downstream_learning_curves.py b/scripts/analyses/sfig4_5_downstream_learning_curves.py
index dbdfcb8..4c58e30 100644
--- a/scripts/analyses/sfig4_5_downstream_learning_curves.py
+++ b/scripts/analyses/sfig4_5_downstream_learning_curves.py
@@ -199,7 +199,7 @@ def sfig_downstream_learning_curves(
 
 def get_argparse() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
-        description='appendix figure 4-5 of the manuscript; downstream adapatation learning curves'
+        description='appendix figure 4-5 of the manuscript; downstream adaptation learning curves'
     )
 
     parser.add_argument(
diff --git a/scripts/analyses/sfig6_downstream_performance_replication.py b/scripts/analyses/sfig6_downstream_performance_replication.py
index e7734fd..d1e0d47 100644
--- a/scripts/analyses/sfig6_downstream_performance_replication.py
+++ b/scripts/analyses/sfig6_downstream_performance_replication.py
@@ -9,7 +9,7 @@
 
 
 def sfig_downstream_performance_replication(config: Dict=None) -> None:
-    """Script's main funtion; creates Appendix Figure 6 by wrapping
+    """Script's main function; creates Appendix Figure 6 by wrapping
     fig_downstream_performance() from scripts/analyses/fig5_downstream-performance.py"""
 
     if config is None:
diff --git a/scripts/analyses/sfig7_downstream_learning_curves_replication.py b/scripts/analyses/sfig7_downstream_learning_curves_replication.py
index 6bdcbae..c213935 100644
--- a/scripts/analyses/sfig7_downstream_learning_curves_replication.py
+++ b/scripts/analyses/sfig7_downstream_learning_curves_replication.py
@@ -9,7 +9,7 @@
 
 
 def sfig_downstream_learning_curves_replication(config: Dict=None) -> None:
-    """Script's main funtion; creates Appendix Figure 7 by wrapping
+    """Script's main function; creates Appendix Figure 7 by wrapping
     fig_downstream_performance() from scripts/analyses/fig5_downstream-performance.py"""
 
     if config is None:
diff --git a/scripts/dataprep/upstream/dataprep.py b/scripts/dataprep/upstream/dataprep.py
index bba94db..3eb1bac 100644
--- a/scripts/dataprep/upstream/dataprep.py
+++ b/scripts/dataprep/upstream/dataprep.py
@@ -192,7 +192,7 @@ def get_args() -> argparse.ArgumentParser:
         default=-1,
         type=float,
         help='repetition time / TR of BOLD data (in seconds); '
-             'will be infered from data files, if not set (or set to -1).'
+             'will be inferred from data files, if not set (or set to -1).'
     )
     parser.add_argument(
         '--check-fmriprep-bug',
diff --git a/scripts/train.py b/scripts/train.py
index 7968c97..90d18d3 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -394,7 +394,7 @@ def make_model(model_config: Dict=None):
 def get_config(args: argparse.Namespace=None) -> Dict:
     """
     Make config from command line arguments (as created by get_args()).
-    Performs additional formating of args required for calling train().
+    Performs additional formatting of args required for calling train().
     """
 
     if args is None:
diff --git a/src/batcher/make.py b/src/batcher/make.py
index 72a801e..64a4c43 100644
--- a/src/batcher/make.py
+++ b/src/batcher/make.py
@@ -45,7 +45,7 @@ def make_batcher(
         run files.
     sample_random_seq: bool
         If True, the sequences are sampled randomly from
-        the data run files, given the spefied
+        the data run files, given the specified
         sequence length (seq_min and seq_max) and the
         specified gap consecutive sequences (bert_seq_gap_min,
         bert_seq_gap_max) for BERT-style training.
diff --git a/src/decoder/make.py b/src/decoder/make.py
index de451c6..afa0ff5 100644
--- a/src/decoder/make.py
+++ b/src/decoder/make.py
@@ -112,4 +112,4 @@ def make_decoder(
         return LinearBaseline(**kwargs)
     
     else:
-        raise ValueError(f'{architecture}-architecture unkown.')
\ No newline at end of file
+        raise ValueError(f'{architecture}-architecture unknown.')
\ No newline at end of file
diff --git a/tests/test_adapt.py b/tests/test_adapt.py
index bb07839..b44291b 100644
--- a/tests/test_adapt.py
+++ b/tests/test_adapt.py
@@ -60,7 +60,7 @@ def test_adapt_decoding() -> None:
                     'training_style': pre_training_style,
                 }
             )
-        # adapt mdoel
+        # adapt model
         adapt_trainer = run_train_process(
             config={
                     **ADAPT_CONFIG,
diff --git a/tests/test_checkpoints.py b/tests/test_checkpoints.py
index 563993b..fae9708 100644
--- a/tests/test_checkpoints.py
+++ b/tests/test_checkpoints.py
@@ -115,7 +115,7 @@ def test_checkpoint_forward_pass() -> None:
         model.eval()
         loaded_model.eval()
         batch_prepped = model.embedder.prep_batch(batch)
-        # test embdder forward pass
+        # test embedder forward pass
         inputs_embeds = model.embedder(batch=batch_prepped)
         inputs_embeds_loaded = loaded_model.embedder(batch=batch_prepped)
         assert torch.equal(

From a83f28be9e819a3d8ca2b8e6091a65f82246330e Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 24 Oct 2024 14:40:46 -0400
Subject: [PATCH 4/4] [DATALAD RUNCMD] Do interactive fixing of some ambigous
 typos

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w -i 3 -C 2",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 scripts/dataprep/downstream/mdtb_dataprep.py | 2 +-
 src/decoder/make.py                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/dataprep/downstream/mdtb_dataprep.py b/scripts/dataprep/downstream/mdtb_dataprep.py
index 5d686b5..079eb5c 100644
--- a/scripts/dataprep/downstream/mdtb_dataprep.py
+++ b/scripts/dataprep/downstream/mdtb_dataprep.py
@@ -287,7 +287,7 @@ def get_args() -> argparse.Namespace:
         metavar='DIR',
         default='../data/downstream/',
         type=str,
-        help='path where .tar files for fMRI runs wil be stored '
+        help='path where .tar files for fMRI runs will be stored '
              '(default: ../data/downstream)'
     )
     parser.add_argument(
diff --git a/src/decoder/make.py b/src/decoder/make.py
index afa0ff5..2756474 100644
--- a/src/decoder/make.py
+++ b/src/decoder/make.py
@@ -68,7 +68,7 @@ def make_decoder(
         (as generated by src.embedder.prep_batch).
     decode(outputs: Dict):
         Make decoding prediction, given outputs generated by
-        caling forward().    
+        calling forward().    
     switch_decoding_mode(is_decoding_mode: bool):
         Switch model to decoding mode (is_decoding_mode=True).
         Relevant for adaptation of pre-trained models