From ad1cdcbc14564d8dfefc6ca772c8bcb1f7905238 Mon Sep 17 00:00:00 2001 From: Thang Pham Date: Tue, 26 May 2026 10:40:16 -0500 Subject: [PATCH 1/3] Add PACMOF2 module for charge prediction on CIF files Wraps the pacmof2 library to allow users to predict partial atomic charges on single CIF files or directories of CIFs. Adds CLI command (matkit pacmof2 predict), optional dependency, and typed result contract. --- pyproject.toml | 3 +- src/matkit/__init__.py | 23 +++++++ src/matkit/cli.py | 112 +++++++++++++++++++++++++++++++++ src/matkit/pacmof2/__init__.py | 8 +++ src/matkit/pacmof2/pacmof2.py | 82 ++++++++++++++++++++++++ src/matkit/types.py | 9 +++ 6 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 src/matkit/pacmof2/__init__.py create mode 100644 src/matkit/pacmof2/pacmof2.py diff --git a/pyproject.toml b/pyproject.toml index c6de015..0455557 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ dependencies = [ rdkit = ["rdkit"] mlip = ["mace-torch"] plot = ["matplotlib>=3.5"] -all = ["rdkit", "mace-torch", "matplotlib>=3.5"] +pacmof2 = ["pacmof2"] +all = ["rdkit", "mace-torch", "matplotlib>=3.5", "pacmof2"] dev = ["pytest>=7.0", "ruff>=0.4"] [project.scripts] diff --git a/src/matkit/__init__.py b/src/matkit/__init__.py index 93cc370..fe43258 100644 --- a/src/matkit/__init__.py +++ b/src/matkit/__init__.py @@ -1,3 +1,26 @@ """MatKit: A modular Python toolkit for molecular simulations.""" __version__ = "0.1.0" + +_SUBMODULES = { + "graspa", + "graspa_sycl", + "raspa2", + "raspa3", + "zeopp", + "mlip", + "utils", + "io", + "plot", + "tobacco", + "orca", + "pacmof2", +} + + +def __getattr__(name): + if name in _SUBMODULES: + import importlib + + return importlib.import_module(f"matkit.{name}") + raise AttributeError(f"module 'matkit' has no attribute {name!r}") diff --git a/src/matkit/cli.py b/src/matkit/cli.py index 7edbe6d..963cdfd 100644 --- a/src/matkit/cli.py +++ b/src/matkit/cli.py @@ -1053,6 +1053,118 @@ def uma_opt_batch_cmd( click.echo(f"Error: {e}", err=True) +# ========================================== +# PACMOF2 COMMANDS +# ========================================== +@main.group("pacmof2") +def pacmof2_cli(): + """Commands for PACMOF2 charge prediction.""" + pass + + +@pacmof2_cli.command("predict") +@click.option( + "--cif", + default=None, + type=click.Path(exists=True), + help="Path to a single CIF file.", +) +@click.option( + "--cif-dir", + default=None, + type=click.Path(exists=True, file_okay=False), + help="Directory containing CIF files.", +) +@click.option( + "--outdir", + required=True, + type=click.Path(), + help="Output directory for CIFs with predicted charges.", +) +@click.option( + "--identifier", + default="_pacmof", + help="Suffix for output filenames (default: _pacmof).", +) +@click.option( + "--net-charge", + default="0", + help="Net charge: integer for single MOF, or path to " + "JSON file mapping filenames to charges for batch.", +) +@click.option( + "--adjust-method", + default="mean", + type=click.Choice(["mean", "magnitude"]), + help="Charge adjustment method.", +) +def pacmof2_predict( + cif, + cif_dir, + outdir, + identifier, + net_charge, + adjust_method, +): + """Predict partial atomic charges for CIF structures. + + Provide either --cif for a single file or --cif-dir for + a directory of CIF files. + + \b + Examples: + matkit pacmof2 predict --cif-dir cifs/ --outdir charged/ + matkit pacmof2 predict --cif structure.cif --outdir charged/ + matkit pacmof2 predict --cif-dir cifs/ --outdir charged/ \\ + --net-charge charges.json + """ + if cif and cif_dir: + click.echo( + "Error: specify --cif or --cif-dir, not both.", + err=True, + ) + return + if not cif and not cif_dir: + click.echo( + "Error: provide --cif or --cif-dir.", err=True + ) + return + + cif_path = cif_dir if cif_dir else cif + + # Parse net_charge: try int/float first, then JSON path + try: + nc = int(net_charge) + except ValueError: + try: + nc = float(net_charge) + except ValueError: + nc = net_charge # treat as JSON file path + + try: + from matkit.pacmof2 import run_charge_prediction + + result = run_charge_prediction( + cif_path=cif_path, + output_dir=outdir, + identifier=identifier, + net_charge=nc, + adjust_charge_method=adjust_method, + ) + click.echo( + f"Predicted charges for {result['num_structures']} " + f"structure(s). Output: {result['output_dir']}" + ) + except ImportError: + click.echo( + "Error: pacmof2 is required. " + "Install with: pip install matkit[pacmof2]", + err=True, + ) + except Exception as e: + click.echo(f"Error: {e}", err=True) + + # ========================================== # ZEOPP COMMANDS # ========================================== diff --git a/src/matkit/pacmof2/__init__.py b/src/matkit/pacmof2/__init__.py new file mode 100644 index 0000000..ad49612 --- /dev/null +++ b/src/matkit/pacmof2/__init__.py @@ -0,0 +1,8 @@ +__all__ = [] + +try: + from matkit.pacmof2.pacmof2 import run_charge_prediction + + __all__ += ["run_charge_prediction"] +except ImportError: + pass diff --git a/src/matkit/pacmof2/pacmof2.py b/src/matkit/pacmof2/pacmof2.py new file mode 100644 index 0000000..f91654c --- /dev/null +++ b/src/matkit/pacmof2/pacmof2.py @@ -0,0 +1,82 @@ +"""PACMOF2 charge prediction wrapper for matkit.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Union + +from pacmof2 import get_charges + +from matkit.types import PACMOF2Result + + +def run_charge_prediction( + cif_path: str, + output_dir: str, + identifier: str = "_pacmof", + net_charge: Union[int, float, dict] = 0, + adjust_charge_method: str = "mean", +) -> PACMOF2Result: + """Run PACMOF2 charge prediction on CIF file(s). + + Args: + cif_path: Path to a single CIF file or a directory + containing CIF files. + output_dir: Directory where output CIF files with + predicted charges will be written. + identifier: Suffix appended to output filenames + (default: "_pacmof"). + net_charge: Net charge for ionic MOFs. Use 0 for + neutral MOFs (default), an int/float for a single + structure, or a dict mapping CIF filenames to + charges for batch ionic processing. + adjust_charge_method: Method to enforce net charge + constraint. Either "mean" (default) or + "magnitude". + + Returns: + PACMOF2Result dict with keys: success, output_dir, + num_structures, error. + """ + cif_path = Path(cif_path) + output_dir = Path(output_dir) + + if not cif_path.exists(): + raise FileNotFoundError(f"CIF path not found: {cif_path}") + + multiple_cifs = cif_path.is_dir() + + if multiple_cifs: + n = len(list(cif_path.glob("*.cif"))) + if n == 0: + raise FileNotFoundError( + f"No .cif files found in {cif_path}" + ) + else: + n = 1 + + # If net_charge is a string path, load the JSON file + if isinstance(net_charge, str): + nc_path = Path(net_charge) + if nc_path.is_file(): + with open(nc_path) as f: + net_charge = json.load(f) + + output_dir.mkdir(parents=True, exist_ok=True) + + get_charges( + path_to_cif=str(cif_path), + output_path=str(output_dir), + identifier=identifier, + multiple_cifs=multiple_cifs, + adjust_charge_method=adjust_charge_method, + net_charge=net_charge, + ) + + return PACMOF2Result( + success=True, + output_dir=str(output_dir), + num_structures=n, + error=None, + ) diff --git a/src/matkit/types.py b/src/matkit/types.py index 2df5b3d..b463428 100644 --- a/src/matkit/types.py +++ b/src/matkit/types.py @@ -51,6 +51,15 @@ class ZeoppResult(TypedDict): error: Optional[str] +class PACMOF2Result(TypedDict): + """Return type for ``matkit.pacmof2.run_charge_prediction``.""" + + success: bool + output_dir: str + num_structures: int + error: Optional[str] + + class UMABatchResult(TypedDict): """Record type for ``matkit.mlip.run_opt_uma_batch`` results.""" From 6f8ef44222febe6cc2372eac05f4729de5d5cd14 Mon Sep 17 00:00:00 2001 From: Thang Pham Date: Tue, 26 May 2026 10:45:36 -0500 Subject: [PATCH 2/3] Run ruff format on cli.py and pacmof2.py --- src/matkit/cli.py | 4 +--- src/matkit/pacmof2/pacmof2.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/matkit/cli.py b/src/matkit/cli.py index 963cdfd..929df79 100644 --- a/src/matkit/cli.py +++ b/src/matkit/cli.py @@ -1125,9 +1125,7 @@ def pacmof2_predict( ) return if not cif and not cif_dir: - click.echo( - "Error: provide --cif or --cif-dir.", err=True - ) + click.echo("Error: provide --cif or --cif-dir.", err=True) return cif_path = cif_dir if cif_dir else cif diff --git a/src/matkit/pacmof2/pacmof2.py b/src/matkit/pacmof2/pacmof2.py index f91654c..f700cf0 100644 --- a/src/matkit/pacmof2/pacmof2.py +++ b/src/matkit/pacmof2/pacmof2.py @@ -50,9 +50,7 @@ def run_charge_prediction( if multiple_cifs: n = len(list(cif_path.glob("*.cif"))) if n == 0: - raise FileNotFoundError( - f"No .cif files found in {cif_path}" - ) + raise FileNotFoundError(f"No .cif files found in {cif_path}") else: n = 1 From bec8fd61200c31997eaf19ca819bbdc85fb2039d Mon Sep 17 00:00:00 2001 From: Thang Pham Date: Tue, 26 May 2026 11:01:27 -0500 Subject: [PATCH 3/3] Fix gRASPA template to use __COMPONENTS__ placeholder for adsorbate substitution The template had a hardcoded component block with literal "ADSORBATE" as the MoleculeName, which was never replaced by setup_simulation(). Replace it with the __COMPONENTS__ placeholder that generate_component_blocks() already handles. --- src/matkit/graspa/files/template/simulation.input | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/matkit/graspa/files/template/simulation.input b/src/matkit/graspa/files/template/simulation.input index 3b676d4..7aa77a9 100644 --- a/src/matkit/graspa/files/template/simulation.input +++ b/src/matkit/graspa/files/template/simulation.input @@ -27,11 +27,4 @@ CutOffVDW CUTOFF CutOffCoulomb CUTOFF EwaldPrecision 1e-6 -Component 0 MoleculeName ADSORBATE - IdealGasRosenbluthWeight 1.0 - FugacityCoefficient PR-EOS - TranslationProbability 1.0 - RotationProbability 1.0 - ReinsertionProbability 1.0 - SwapProbability 2.0 - CreateNumberOfMolecules 0 +__COMPONENTS__