diff --git a/.gitignore b/.gitignore index eef8120..fd167d1 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,4 @@ docs/archive/ # codex .codex .worktrees/ +config_experiment.json diff --git a/config.json b/config.json index fb6e54b..d6df05d 100644 --- a/config.json +++ b/config.json @@ -531,8 +531,8 @@ "threads": 8 }, "ont_amplicon_params": { - "model_type": "errhmm", - "model_file": "reference/pbsim3/ERRHMM-ONT.model", + "model_type": "qshmm", + "model_file": "reference/pbsim3/QSHMM-ONT-HQ.model", "threads": 8, "accuracy_mean": 0.95 }, @@ -540,7 +540,10 @@ "forward_primer": "GGAGAAAAGGAGACTTCGGCTACCCAG", "reverse_primer": "GCCGTTGTGCACCAGAGTAGAAGCTGA", "primer_source": "Wenzel et al. 2018 (PMID: 29520014)", - "expected_product_range": [500, 15000], + "expected_product_range": [ + 500, + 15000 + ], "pcr_bias": { "preset": "default" } @@ -634,7 +637,11 @@ "consensus_motif": "RCHLGPGHQAGPGLHR", "identity_threshold": 0.8, "expected_repeat_count": 10, - "key_residues": ["R", "C", "H"], + "key_residues": [ + "R", + "C", + "H" + ], "weights": { "repeat": 0.6, "composition": 0.4 diff --git a/docs/about/changelog.md b/docs/about/changelog.md index 158fb53..e4ad1ae 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CLI routes `--model-file` overrides to `ont_amplicon_params` for ONT and `pacbio_params` for PacBio ### Added -- `ont_amplicon_params` config section with ONT-specific defaults (ERRHMM-ONT.model) +- `ont_amplicon_params` config section with ONT-specific defaults (QSHMM-ONT-HQ.model) - `OntAmpliconConfig` TypedDict for type-safe ONT parameter access - Config schema validation and path resolution for `ont_amplicon_params` - Test for ONT platform using `ont_amplicon_params` (not `pacbio_params`) diff --git a/docs/guides/amplicon-simulation.md b/docs/guides/amplicon-simulation.md index ac00193..57cbc9e 100644 --- a/docs/guides/amplicon-simulation.md +++ b/docs/guides/amplicon-simulation.md @@ -44,7 +44,7 @@ muconeup --config config.json reads amplicon \ # 2b. ONT amplicon reads muconeup --config config.json reads amplicon --platform ont \ output/sample.001.simulated.fa \ - --model-file /path/to/ERRHMM-ONT-HQ.model \ + --model-file /path/to/QSHMM-ONT-HQ.model \ --coverage 500 --seed 42 ``` @@ -101,7 +101,7 @@ muconeup --config config.json reads amplicon sample.fa \ # Oxford Nanopore muconeup --config config.json reads amplicon --platform ont sample.fa \ - --model-file /path/to/ERRHMM-ONT-HQ.model + --model-file /path/to/QSHMM-ONT-HQ.model ``` Stages 1-4 (extraction, PCR bias, template generation) are shared. The platforms differ in read generation and alignment: @@ -112,7 +112,7 @@ Stages 1-4 (extraction, PCR bias, template generation) are shared. The platforms | pbsim3 pass_num | 3+ (multi-pass CLR) | 1 (single-pass) | | Consensus | CCS (multi-pass -> HiFi) | None (skip) | | minimap2 preset | `map-hifi` | `map-ont` | -| Error model | `QSHMM-SEQUEL.model` etc. | `ERRHMM-ONT.model` etc. | +| Error model | `QSHMM-SEQUEL.model` etc. | `QSHMM-ONT-HQ.model` etc. | | Output suffix | `*_amplicon_hifi.bam` | `*_amplicon_ont.bam` | | Tool dependencies | pbsim3, ccs, samtools, minimap2 | pbsim3, samtools, minimap2 | @@ -125,8 +125,8 @@ The ONT amplicon pipeline reads from the `ont_amplicon_params` config section (s ```json "ont_amplicon_params": { - "model_type": "errhmm", - "model_file": "reference/pbsim3/ERRHMM-ONT.model", + "model_type": "qshmm", + "model_file": "reference/pbsim3/QSHMM-ONT-HQ.model", "threads": 8, "accuracy_mean": 0.95 } @@ -134,14 +134,14 @@ The ONT amplicon pipeline reads from the `ont_amplicon_params` config section (s | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `model_type` | string | `"errhmm"` | pbsim3 model type (`errhmm` or `qshmm`) | -| `model_file` | string | `ERRHMM-ONT.model` | pbsim3 ONT error model file | +| `model_type` | string | `"qshmm"` | pbsim3 model type (`errhmm` or `qshmm`) | +| `model_file` | string | `QSHMM-ONT-HQ.model` | pbsim3 ONT error model file | | `threads` | int | `8` | Number of threads | | `accuracy_mean` | float | `0.95` | Mean read accuracy | Available ONT models in `reference/pbsim3/`: -- `ERRHMM-ONT.model` -- standard ONT error profile +- `QSHMM-ONT-HQ.model` -- standard ONT error profile - `QSHMM-ONT.model` -- quality score ONT model - `QSHMM-ONT-HQ.model` -- high-quality ONT model (e.g., Kit14/Dorado) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 8bdb674..ca657ac 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -607,8 +607,8 @@ Parameters for ONT amplicon read simulation (pbsim3 single-pass mode). Separate ```json { "ont_amplicon_params": { - "model_type": "errhmm", - "model_file": "reference/pbsim3/ERRHMM-ONT.model", + "model_type": "qshmm", + "model_file": "reference/pbsim3/QSHMM-ONT-HQ.model", "threads": 8, "accuracy_mean": 0.95 } @@ -619,13 +619,13 @@ Parameters for ONT amplicon read simulation (pbsim3 single-pass mode). Separate | Field | Type | Description | Default | |-------|------|-------------|---------| -| `model_type` | string | "qshmm" or "errhmm" | "errhmm" | -| `model_file` | string | pbsim3 ONT error model file | ERRHMM-ONT.model | +| `model_type` | string | "qshmm" or "errhmm" | "qshmm" | +| `model_file` | string | pbsim3 ONT error model file | QSHMM-ONT-HQ.model | | `threads` | number | Parallel threads (minimum 1) | 8 | | `seed` | integer | Random seed (null = random) | null | | `accuracy_mean` | number | Mean read accuracy (0.0-1.0) | 0.95 | -Available ONT models: `ERRHMM-ONT.model`, `QSHMM-ONT.model`, `QSHMM-ONT-HQ.model`. +Available ONT models: `QSHMM-ONT-HQ.model`, `QSHMM-ONT.model`, `QSHMM-ONT-HQ.model`. --- @@ -784,8 +784,8 @@ See the [Amplicon Simulation Guide](../guides/amplicon-simulation.md) for detail }, "ont_amplicon_params": { - "model_type": "errhmm", - "model_file": "/path/to/pbsim3/ERRHMM-ONT.model", + "model_type": "qshmm", + "model_file": "/path/to/pbsim3/QSHMM-ONT-HQ.model", "threads": 8, "accuracy_mean": 0.95 } diff --git a/muc_one_up/cli/commands/reads.py b/muc_one_up/cli/commands/reads.py index 64e6698..e1b6af9 100644 --- a/muc_one_up/cli/commands/reads.py +++ b/muc_one_up/cli/commands/reads.py @@ -146,7 +146,7 @@ def _apply_ont_amplicon_params( Mutates config in place. Uses ont_amplicon_params section (separate from pacbio_params) so the ONT pipeline gets an ONT-specific model. - Falls back to sensible defaults (ERRHMM-ONT.model) when values are + Falls back to sensible defaults (QSHMM-ONT-HQ.model) when values are missing or explicitly null. """ if "ont_amplicon_params" not in config: @@ -161,12 +161,12 @@ def _apply_ont_amplicon_params( params["seed"] = seed logging.info("Using random seed: %d (results will be reproducible)", seed) - # Default to ERRHMM-ONT.model if missing or explicitly null + # Default to QSHMM-ONT-HQ.model if missing or explicitly null if not params.get("model_file"): - params["model_file"] = "reference/pbsim3/ERRHMM-ONT.model" + params["model_file"] = "reference/pbsim3/QSHMM-ONT-HQ.model" logging.info("Using default ONT model: %s", params["model_file"]) if not params.get("model_type"): - params["model_type"] = "errhmm" + params["model_type"] = "qshmm" def _apply_pacbio_params( @@ -560,7 +560,7 @@ def amplicon( # ONT amplicon simulation muconeup --config X reads amplicon --platform ont sample.fa \\ - --model-file /models/ERRHMM-ONT.model + --model-file /models/QSHMM-ONT-HQ.model # High coverage with stochastic PCR bias muconeup --config X reads amplicon sample.fa \\ diff --git a/muc_one_up/read_simulator/ont_amplicon_pipeline.py b/muc_one_up/read_simulator/ont_amplicon_pipeline.py index 8da8214..ea078bb 100644 --- a/muc_one_up/read_simulator/ont_amplicon_pipeline.py +++ b/muc_one_up/read_simulator/ont_amplicon_pipeline.py @@ -83,8 +83,8 @@ def simulate_ont_amplicon_pipeline( # ONT-specific params — uses ont_amplicon_params (NOT pacbio_params) # Treat None as missing (possible from partial configs with explicit nulls) - model_type = ont_params.get("model_type") or "errhmm" - model_file = ont_params.get("model_file") or "reference/pbsim3/ERRHMM-ONT.model" + model_type = ont_params.get("model_type") or "qshmm" + model_file = ont_params.get("model_file") or "reference/pbsim3/QSHMM-ONT-HQ.model" threads = ont_params.get("threads") or 8 seed = ont_params.get("seed") # None is valid here (random seed) accuracy_mean = ont_params.get("accuracy_mean") or 0.95 @@ -94,7 +94,7 @@ def simulate_ont_amplicon_pipeline( if "ONT" not in model_name and "NANOPORE" not in model_name: logging.warning( "ONT amplicon mode is using model file '%s' which does not appear " - "to be an ONT model. The default is ERRHMM-ONT.model. " + "to be an ONT model. The default is QSHMM-ONT-HQ.model. " "Check ont_amplicon_params.model_file in config or use --model-file.", model_file, ) diff --git a/tests/read_simulator/test_ont_amplicon_pipeline.py b/tests/read_simulator/test_ont_amplicon_pipeline.py index 11b685e..d798195 100644 --- a/tests/read_simulator/test_ont_amplicon_pipeline.py +++ b/tests/read_simulator/test_ont_amplicon_pipeline.py @@ -19,7 +19,7 @@ def muc1_primers(): @pytest.fixture def ont_amplicon_config(tmp_path, muc1_primers): - model_file = tmp_path / "ERRHMM-ONT-HQ.model" + model_file = tmp_path / "QSHMM-ONT-HQ.model" model_file.write_text("model") return { "tools": { @@ -33,7 +33,7 @@ def ont_amplicon_config(tmp_path, muc1_primers): "pcr_bias": {"preset": "default"}, }, "ont_amplicon_params": { - "model_type": "errhmm", + "model_type": "qshmm", "model_file": str(model_file), "threads": 4, },