From 53f430dc4775227aecdab3b14176b501866a9dd8 Mon Sep 17 00:00:00 2001 From: LIANG Bochun Date: Tue, 23 Jan 2024 11:25:57 +0800 Subject: [PATCH 01/21] Add LigpargenRunner --- mdgo/forcefield/__init__.py | 5 +- mdgo/forcefield/ligpargen.py | 99 ++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 mdgo/forcefield/ligpargen.py diff --git a/mdgo/forcefield/__init__.py b/mdgo/forcefield/__init__.py index db3f5534..4a69e29e 100644 --- a/mdgo/forcefield/__init__.py +++ b/mdgo/forcefield/__init__.py @@ -8,10 +8,10 @@ """ __author__ = "Tingzheng Hou, Ryan Kingsbury" -__version__ = "0.3.0" +__version__ = "0.3.1" __maintainer__ = "Tingzheng Hou, Ryan Kingsbury" __email__ = "tingzheng_hou@berkeley.edu" -__date__ = "Jul 19, 2022" +__date__ = "Dec 19, 2023" from .aqueous import IonLJData, Aqueous @@ -19,3 +19,4 @@ from .crawler import FFcrawler from .maestro import MaestroRunner from .pubchem import PubChemRunner +from .ligpargen import LigpargenRunner diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py new file mode 100644 index 00000000..b1379ba9 --- /dev/null +++ b/mdgo/forcefield/ligpargen.py @@ -0,0 +1,99 @@ + +""" +This module implements a core class LigpargenRunner for generating +LAMMPS/GROMACS data files from molecule structure using LigParGen 2.1 +and BOSS 5.0. +""" + +import subprocess +import os +from pymatgen.io.lammps.data import LammpsData +from mdgo.util.dict_utils import lmp_mass_to_name + +class LigpargenRunner: + + def __init__( + self, + structure_name: str, + structure_dir: str, + working_dir: str= "boss_files", + out: str = "lmp", + charge: int = 0, + opt: int = 0, + xyz: bool = False, + ): + """Base constructor.""" + self.structure = structure_dir + "/" + structure_name + self.name = os.path.splitext(structure_name)[0] + self.structure_format = os.path.splitext(structure_name)[1][1:] + print("Input format:", self.structure_format) + self.structure_dir = structure_dir + self.work = working_dir + self.out = out + self.charge = charge + self.opt = opt + self.xyz = xyz + + + def data_from_structure(self, wait: float = 30): + + try: + cmd = f"ligpargen -i {self.structure} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" + subprocess.run(cmd, shell=True) + except subprocess.CalledProcessError as e: + raise ValueError(f"LigParGen failed with errorcode {e.returncode} and stderr: {e.stderr}") from e + + if self.out == "lmp": + lmp_name = f"{self.name}.lmp" + lmp_file = f"{self.structure_dir}/{self.name}.lmp" + cp_lmp_data = f"cp {self.work}/{self.name}.lammps.lmp {lmp_file}" + subprocess.run(cp_lmp_data, shell=True) + + if self.xyz: + lmp_file = f"{self.structure_dir}/{self.name}.lmp" + data_obj = LammpsData.from_file(lmp_file) + element_id_dict = lmp_mass_to_name(data_obj.masses) + coords = data_obj.atoms[["type", "x", "y", "z"]] + lines = [] + lines.append(str(len(coords.index))) + lines.append("") + for _, r in coords.iterrows(): + element_name = element_id_dict.get(int(r["type"])) + assert element_name is not None + line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) + lines.append(line) + + with open(os.path.join(self.structure_dir, lmp_name + ".xyz"), "w") as xyz_file: + xyz_file.write("\n".join(lines)) + print(".xyz file saved.") + + def data_from_smiles(self, wait: float = 30): + try: + cmd = f"ligpargen -s {self.name} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" + subprocess.run(cmd, shell=True) + except subprocess.CalledProcessError as e: + raise ValueError(f"LigParGen failed with errorcode {e.returncode} and stderr: {e.stderr}") from e + + if self.out == "lmp": + lmp_name = f"{self.name}.lmp" + lmp_file = f"{self.structure_dir}/{self.name}.lmp" + cp_lmp_data = f"cp {self.work}/{self.name}.lammps.lmp {lmp_file}" + subprocess.run(cp_lmp_data, shell=True) + + if self.xyz: + lmp_file = f"{self.structure_dir}/{self.name}.lmp" + data_obj = LammpsData.from_file(lmp_file) + element_id_dict = lmp_mass_to_name(data_obj.masses) + coords = data_obj.atoms[["type", "x", "y", "z"]] + lines = [] + lines.append(str(len(coords.index))) + lines.append("") + for _, r in coords.iterrows(): + element_name = element_id_dict.get(int(r["type"])) + assert element_name is not None + line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) + lines.append(line) + + with open(os.path.join(self.structure_dir, lmp_name + ".xyz"), "w") as xyz_file: + xyz_file.write("\n".join(lines)) + print(".xyz file saved.") From c3e7f3ed6b472560acb8c5173d75eb09b371853d Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:38:35 +0800 Subject: [PATCH 02/21] move ffcrawler class to ligpargen.py --- mdgo/forcefield/__init__.py | 3 +- mdgo/forcefield/crawler.py | 218 +---------------------------------- mdgo/forcefield/ligpargen.py | 191 ++++++++++++++++++++++++++++++ tests/test_forcefield.py | 2 +- 4 files changed, 194 insertions(+), 220 deletions(-) diff --git a/mdgo/forcefield/__init__.py b/mdgo/forcefield/__init__.py index 4a69e29e..8ae75669 100644 --- a/mdgo/forcefield/__init__.py +++ b/mdgo/forcefield/__init__.py @@ -16,7 +16,6 @@ from .aqueous import IonLJData, Aqueous from .charge import ChargeWriter -from .crawler import FFcrawler from .maestro import MaestroRunner from .pubchem import PubChemRunner -from .ligpargen import LigpargenRunner +from .ligpargen import LigpargenRunner, FFcrawler diff --git a/mdgo/forcefield/crawler.py b/mdgo/forcefield/crawler.py index 6047a768..44674ba8 100644 --- a/mdgo/forcefield/crawler.py +++ b/mdgo/forcefield/crawler.py @@ -3,221 +3,5 @@ # Distributed under the terms of the MIT License. """ -This module implements two core class FFcrawler for generating -LAMMPS/GROMACS data files from molecule structure using -the LigParGen web server. - -For using the FFcrawler class: - - * Download the ChromeDriver executable that - matches your Chrome version via https://chromedriver.chromium.org/downloads +This module was used to implement a core class FFcrawler, and has been moved to ligpargen module. """ - -import os -import shutil -import time -from typing import Optional - - -from pymatgen.io.lammps.data import LammpsData -from selenium import webdriver -from selenium.common.exceptions import ( - TimeoutException, - WebDriverException, -) -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait - -from mdgo.util.dict_utils import lmp_mass_to_name - - -class FFcrawler: - """ - Web scrapper that can automatically upload structure to the LigParGen - server and download LAMMPS/GROMACS data file. - - Args: - write_dir: Directory for writing output. - chromedriver_dir: Directory to the ChromeDriver executable. - headless: Whether to run Chrome in headless (silent) mode. - Default to True. - xyz: Whether to write the structure in the LigParGen - generated data file as .xyz. Default to False. This is useful - because the order and the name of the atoms could be - different from the initial input.) - gromacs: Whether to save GROMACS format data files. - Default to False. - - Examples: - - >>> lpg = FFcrawler('/path/to/work/dir', '/path/to/chromedriver') - >>> lpg.data_from_pdb("/path/to/pdb") - """ - - def __init__( - self, - write_dir: str, - chromedriver_dir: Optional[str] = None, - headless: bool = True, - xyz: bool = False, - gromacs: bool = False, - ): - """Base constructor.""" - self.write_dir = write_dir - self.xyz = xyz - self.gromacs = gromacs - self.preferences = { - "download.default_directory": write_dir, - "safebrowsing.enabled": "false", - "profile.managed_default_content_settings.images": 2, - } - self.options = webdriver.ChromeOptions() - self.server = webdriver.ChromeService(chromedriver_dir) - self.options.add_argument( - 'user-agent="Mozilla/5.0 ' - "(Macintosh; Intel Mac OS X 10_14_6) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - 'Chrome/88.0.4324.146 Safari/537.36"' - ) - self.options.add_argument("--window-size=1920,1080") - self.options.add_argument("ignore-certificate-errors") - if headless: - self.options.add_argument("--headless") - self.options.add_experimental_option("prefs", self.preferences) - self.options.add_experimental_option("excludeSwitches", ["enable-automation"]) - if chromedriver_dir is None: - self.web = webdriver.Chrome(options=self.options) - else: - self.web = webdriver.Chrome(service=self.server, options=self.options) - self.wait = WebDriverWait(self.web, 10) - self.web.get("http://traken.chem.yale.edu/ligpargen/") - time.sleep(1) - print("LigParGen server connected.") - - def quit(self): - """ - Method for quiting ChromeDriver. - - """ - self.web.quit() - - def data_from_pdb(self, pdb_dir: str): - """ - Use the LigParGen server to generate a LAMMPS data file from a pdb file. - Write out a LAMMPS data file. - - Args: - pdb_dir: The path to the input pdb structure file. - """ - self.web.get("http://traken.chem.yale.edu/ligpargen/") - upload_xpath = '//*[@id="exampleMOLFile"]' - time.sleep(1) - self.wait.until(EC.presence_of_element_located((By.XPATH, upload_xpath))) - upload = self.web.find_element(By.XPATH, upload_xpath) - try: - upload.send_keys(pdb_dir) - submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") - submit.click() - pdb_filename = os.path.basename(pdb_dir) - self.download_data(os.path.splitext(pdb_filename)[0] + ".lmp") - except TimeoutException: - print("Timeout! Web server no response for 10s, file download failed!") - except WebDriverException as e: - print(e) - finally: - self.quit() - - def data_from_smiles(self, smiles_code): - """ - Use the LigParGen server to generate a LAMMPS data file from a SMILES code. - Write out a LAMMPS data file. - - Args: - smiles_code: The SMILES code for the LigParGen input. - """ - self.web.get("http://traken.chem.yale.edu/ligpargen/") - time.sleep(1) - smile = self.web.find_element(By.XPATH, '//*[@id="smiles"]') - smile.send_keys(smiles_code) - submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") - submit.click() - try: - self.download_data(smiles_code + ".lmp") - except TimeoutException: - print("Timeout! Web server no response for 10s, file download failed!") - finally: - self.quit() - - def download_data(self, lmp_name: str): - """ - Helper function that download and write out the LAMMPS data file. - - Args: - lmp_name: Name of the LAMMPS data file. - """ - print("Structure info uploaded. Rendering force field...") - lmp_xpath = "/html/body/div[2]/div[2]/div[1]/div/div[14]/form/input[1]" - self.wait.until(EC.presence_of_element_located((By.XPATH, lmp_xpath))) - jmol = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[2]") - self.web.execute_script("arguments[0].remove();", jmol) - self.wait.until(EC.element_to_be_clickable((By.XPATH, lmp_xpath))) - data_lmp = self.web.find_element(By.XPATH, lmp_xpath) - num_file = len([f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".lmp"]) + 1 - data_lmp.click() - while True: - files = sorted( - [ - os.path.join(self.write_dir, f) - for f in os.listdir(self.write_dir) - if os.path.splitext(f)[1] == ".lmp" - ], - key=os.path.getmtime, - ) - # wait for file to finish download - if len(files) < num_file: - time.sleep(1) - print("waiting for download to be initiated") - else: - newest = files[-1] - if ".crdownload" in newest: - time.sleep(1) - print("waiting for download to complete") - else: - break - print("Force field file downloaded.") - lmp_file = newest - if self.xyz: - data_obj = LammpsData.from_file(lmp_file) - element_id_dict = lmp_mass_to_name(data_obj.masses) - coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = [] - lines.append(str(len(coords.index))) - lines.append("") - for _, r in coords.iterrows(): - element_name = element_id_dict.get(int(r["type"])) - assert element_name is not None - line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) - lines.append(line) - - with open(os.path.join(self.write_dir, lmp_name + ".xyz"), "w") as xyz_file: - xyz_file.write("\n".join(lines)) - print(".xyz file saved.") - if self.gromacs: - data_gro = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[8]/form/input[1]") - data_itp = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[9]/form/input[1]") - data_gro.click() - data_itp.click() - time.sleep(1) - gro_file = max( - [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".gro"], - key=os.path.getctime, - ) - itp_file = max( - [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".itp"], - key=os.path.getctime, - ) - shutil.move(gro_file, os.path.join(self.write_dir, lmp_name[:-4] + ".gro")) - shutil.move(itp_file, os.path.join(self.write_dir, lmp_name[:-4] + ".itp")) - shutil.move(lmp_file, os.path.join(self.write_dir, lmp_name)) - print("Force field file saved.") diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index b1379ba9..ec9431e3 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -97,3 +97,194 @@ def data_from_smiles(self, wait: float = 30): with open(os.path.join(self.structure_dir, lmp_name + ".xyz"), "w") as xyz_file: xyz_file.write("\n".join(lines)) print(".xyz file saved.") + + +class FFcrawler: + """ + Web scrapper that can automatically upload structure to the LigParGen + server and download LAMMPS/GROMACS data file. + + Args: + write_dir: Directory for writing output. + chromedriver_dir: Directory to the ChromeDriver executable. + headless: Whether to run Chrome in headless (silent) mode. + Default to True. + xyz: Whether to write the structure in the LigParGen + generated data file as .xyz. Default to False. This is useful + because the order and the name of the atoms could be + different from the initial input.) + gromacs: Whether to save GROMACS format data files. + Default to False. + + Examples: + + >>> lpg = FFcrawler('/path/to/work/dir', '/path/to/chromedriver') + >>> lpg.data_from_pdb("/path/to/pdb") + """ + + def __init__( + self, + write_dir: str, + chromedriver_dir: Optional[str] = None, + headless: bool = True, + xyz: bool = False, + gromacs: bool = False, + ): + """Base constructor.""" + self.write_dir = write_dir + self.xyz = xyz + self.gromacs = gromacs + self.preferences = { + "download.default_directory": write_dir, + "safebrowsing.enabled": "false", + "profile.managed_default_content_settings.images": 2, + } + self.options = webdriver.ChromeOptions() + self.server = webdriver.ChromeService(chromedriver_dir) + self.options.add_argument( + 'user-agent="Mozilla/5.0 ' + "(Macintosh; Intel Mac OS X 10_14_6) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + 'Chrome/88.0.4324.146 Safari/537.36"' + ) + self.options.add_argument("--window-size=1920,1080") + self.options.add_argument("ignore-certificate-errors") + if headless: + self.options.add_argument("--headless") + self.options.add_experimental_option("prefs", self.preferences) + self.options.add_experimental_option("excludeSwitches", ["enable-automation"]) + if chromedriver_dir is None: + self.web = webdriver.Chrome(options=self.options) + else: + self.web = webdriver.Chrome(service=self.server, options=self.options) + self.wait = WebDriverWait(self.web, 10) + self.web.get("http://traken.chem.yale.edu/ligpargen/") + time.sleep(1) + print("LigParGen server connected.") + + def quit(self): + """ + Method for quiting ChromeDriver. + + """ + self.web.quit() + + def data_from_pdb(self, pdb_dir: str): + """ + Use the LigParGen server to generate a LAMMPS data file from a pdb file. + Write out a LAMMPS data file. + + Args: + pdb_dir: The path to the input pdb structure file. + """ + self.web.get("http://traken.chem.yale.edu/ligpargen/") + upload_xpath = '//*[@id="exampleMOLFile"]' + time.sleep(1) + self.wait.until(EC.presence_of_element_located((By.XPATH, upload_xpath))) + upload = self.web.find_element(By.XPATH, upload_xpath) + try: + upload.send_keys(pdb_dir) + submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") + submit.click() + pdb_filename = os.path.basename(pdb_dir) + self.download_data(os.path.splitext(pdb_filename)[0] + ".lmp") + except TimeoutException: + print("Timeout! Web server no response for 10s, file download failed!") + except WebDriverException as e: + print(e) + finally: + self.quit() + + def data_from_smiles(self, smiles_code): + """ + Use the LigParGen server to generate a LAMMPS data file from a SMILES code. + Write out a LAMMPS data file. + + Args: + smiles_code: The SMILES code for the LigParGen input. + """ + self.web.get("http://traken.chem.yale.edu/ligpargen/") + time.sleep(1) + smile = self.web.find_element(By.XPATH, '//*[@id="smiles"]') + smile.send_keys(smiles_code) + submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") + submit.click() + try: + self.download_data(smiles_code + ".lmp") + except TimeoutException: + print("Timeout! Web server no response for 10s, file download failed!") + finally: + self.quit() + + def download_data(self, lmp_name: str): + """ + Helper function that download and write out the LAMMPS data file. + + Args: + lmp_name: Name of the LAMMPS data file. + """ + print("Structure info uploaded. Rendering force field...") + lmp_xpath = "/html/body/div[2]/div[2]/div[1]/div/div[14]/form/input[1]" + self.wait.until(EC.presence_of_element_located((By.XPATH, lmp_xpath))) + jmol = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[2]") + self.web.execute_script("arguments[0].remove();", jmol) + self.wait.until(EC.element_to_be_clickable((By.XPATH, lmp_xpath))) + data_lmp = self.web.find_element(By.XPATH, lmp_xpath) + num_file = len([f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".lmp"]) + 1 + data_lmp.click() + while True: + files = sorted( + [ + os.path.join(self.write_dir, f) + for f in os.listdir(self.write_dir) + if os.path.splitext(f)[1] == ".lmp" + ], + key=os.path.getmtime, + ) + # wait for file to finish download + if len(files) < num_file: + time.sleep(1) + print("waiting for download to be initiated") + else: + newest = files[-1] + if ".crdownload" in newest: + time.sleep(1) + print("waiting for download to complete") + else: + break + print("Force field file downloaded.") + lmp_file = newest + if self.xyz: + data_obj = LammpsData.from_file(lmp_file) + element_id_dict = lmp_mass_to_name(data_obj.masses) + coords = data_obj.atoms[["type", "x", "y", "z"]] + lines = [] + lines.append(str(len(coords.index))) + lines.append("") + for _, r in coords.iterrows(): + element_name = element_id_dict.get(int(r["type"])) + assert element_name is not None + line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) + lines.append(line) + + with open(os.path.join(self.write_dir, lmp_name + ".xyz"), "w") as xyz_file: + xyz_file.write("\n".join(lines)) + print(".xyz file saved.") + if self.gromacs: + data_gro = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[8]/form/input[1]") + data_itp = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[9]/form/input[1]") + data_gro.click() + data_itp.click() + time.sleep(1) + gro_file = max( + [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".gro"], + key=os.path.getctime, + ) + itp_file = max( + [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".itp"], + key=os.path.getctime, + ) + shutil.move(gro_file, os.path.join(self.write_dir, lmp_name[:-4] + ".gro")) + shutil.move(itp_file, os.path.join(self.write_dir, lmp_name[:-4] + ".itp")) + shutil.move(lmp_file, os.path.join(self.write_dir, lmp_name)) + print("Force field file saved.") diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index 464d7d5f..71618768 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from mdgo.forcefield.crawler import * +from mdgo.forcefield.ligpargen import * from mdgo.forcefield.aqueous import * test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") From f7de040734ad8aad46ad2bea52c7c2426c2ff89e Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:39:27 +0800 Subject: [PATCH 03/21] fix lints --- mdgo/forcefield/ligpargen.py | 43 +++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index ec9431e3..c53eeebc 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -1,14 +1,38 @@ +# coding: utf-8 +# Copyright (c) Tingzheng Hou. +# Distributed under the terms of the MIT License. """ -This module implements a core class LigpargenRunner for generating -LAMMPS/GROMACS data files from molecule structure using LigParGen 2.1 -and BOSS 5.0. +This module implements two core class LigpargenRunner and FFcrawler for generating +LAMMPS/GROMACS data files from molecule structure using LigParGen 2.1 + BOSS 5.0 or +the LigParGen web server. + +For using the FFcrawler class: + + * Download the ChromeDriver executable that + matches your Chrome version via https://chromedriver.chromium.org/downloads """ -import subprocess import os +import subprocess +import shutil +import time +from typing import Optional + + from pymatgen.io.lammps.data import LammpsData +from selenium import webdriver +from selenium.common.exceptions import ( + TimeoutException, + WebDriverException, +) +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + from mdgo.util.dict_utils import lmp_mass_to_name +from ligpargen.ligpargen import LigParGen + class LigpargenRunner: @@ -23,9 +47,8 @@ def __init__( xyz: bool = False, ): """Base constructor.""" - self.structure = structure_dir + "/" + structure_name - self.name = os.path.splitext(structure_name)[0] - self.structure_format = os.path.splitext(structure_name)[1][1:] + self.structure = os.path.join(structure_dir, structure_name) + self.name, self.structure_format = os.path.splitext(structure_name) print("Input format:", self.structure_format) self.structure_dir = structure_dir self.work = working_dir @@ -54,7 +77,7 @@ def data_from_structure(self, wait: float = 30): data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = [] + lines = list() lines.append(str(len(coords.index))) lines.append("") for _, r in coords.iterrows(): @@ -67,7 +90,7 @@ def data_from_structure(self, wait: float = 30): xyz_file.write("\n".join(lines)) print(".xyz file saved.") - def data_from_smiles(self, wait: float = 30): + def data_from_smiles(self): try: cmd = f"ligpargen -s {self.name} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" subprocess.run(cmd, shell=True) @@ -85,7 +108,7 @@ def data_from_smiles(self, wait: float = 30): data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = [] + lines = list() lines.append(str(len(coords.index))) lines.append("") for _, r in coords.iterrows(): From 15a8f57434bbb862e7cf9788ce2e4b43ae319834 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:39:36 +0800 Subject: [PATCH 04/21] lint --- mdgo/forcefield/ligpargen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index c53eeebc..eea0f5a4 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -56,7 +56,6 @@ def __init__( self.charge = charge self.opt = opt self.xyz = xyz - def data_from_structure(self, wait: float = 30): From a5bece8994e864327ae60253bb796ad28ed41e2c Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:59:20 +0800 Subject: [PATCH 05/21] lint --- mdgo/forcefield/ligpargen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index eea0f5a4..0cfbfa18 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -57,7 +57,7 @@ def __init__( self.opt = opt self.xyz = xyz - def data_from_structure(self, wait: float = 30): + def data_from_structure(self): try: cmd = f"ligpargen -i {self.structure} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" From 704dead7aa8051be977791d64cd30c3a3c42f8fa Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 16:49:59 +0800 Subject: [PATCH 06/21] rewrite LigpargenRunner --- mdgo/forcefield/ligpargen.py | 76 ++++++++++++------------------------ 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index 0cfbfa18..39898c72 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -35,20 +35,21 @@ class LigpargenRunner: - def __init__( self, structure_name: str, structure_dir: str, - working_dir: str= "boss_files", + working_dir: str = "boss_files", out: str = "lmp", charge: int = 0, opt: int = 0, - xyz: bool = False, + xyz: bool = False, ): """Base constructor.""" self.structure = os.path.join(structure_dir, structure_name) self.name, self.structure_format = os.path.splitext(structure_name) + if self.structure_format == "": + self.structure_format = "SMILES" print("Input format:", self.structure_format) self.structure_dir = structure_dir self.work = working_dir @@ -57,53 +58,28 @@ def __init__( self.opt = opt self.xyz = xyz - def data_from_structure(self): - - try: - cmd = f"ligpargen -i {self.structure} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" - subprocess.run(cmd, shell=True) - except subprocess.CalledProcessError as e: - raise ValueError(f"LigParGen failed with errorcode {e.returncode} and stderr: {e.stderr}") from e - - if self.out == "lmp": - lmp_name = f"{self.name}.lmp" - lmp_file = f"{self.structure_dir}/{self.name}.lmp" - cp_lmp_data = f"cp {self.work}/{self.name}.lammps.lmp {lmp_file}" - subprocess.run(cp_lmp_data, shell=True) - - if self.xyz: - lmp_file = f"{self.structure_dir}/{self.name}.lmp" - data_obj = LammpsData.from_file(lmp_file) - element_id_dict = lmp_mass_to_name(data_obj.masses) - coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = list() - lines.append(str(len(coords.index))) - lines.append("") - for _, r in coords.iterrows(): - element_name = element_id_dict.get(int(r["type"])) - assert element_name is not None - line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) - lines.append(line) - - with open(os.path.join(self.structure_dir, lmp_name + ".xyz"), "w") as xyz_file: - xyz_file.write("\n".join(lines)) - print(".xyz file saved.") - - def data_from_smiles(self): - try: - cmd = f"ligpargen -s {self.name} -n {self.name} -p {self.work} -c {self.charge} -o {self.opt}" - subprocess.run(cmd, shell=True) - except subprocess.CalledProcessError as e: - raise ValueError(f"LigParGen failed with errorcode {e.returncode} and stderr: {e.stderr}") from e - - if self.out == "lmp": - lmp_name = f"{self.name}.lmp" - lmp_file = f"{self.structure_dir}/{self.name}.lmp" - cp_lmp_data = f"cp {self.work}/{self.name}.lammps.lmp {lmp_file}" - subprocess.run(cp_lmp_data, shell=True) + def run(self): + if self.structure_format == "SMILES": + molecule_a = LigParGen( + smile=self.structure, + charge=self.charge, + numberOfOptimizations=self.opt, + molname=self.name, + workdir=self.work, + ) + else: + molecule_a = LigParGen( + ifile=self.structure, + charge=self.charge, + numberOfOptimizations=self.opt, + molname=self.name, + workdir=self.work, + ) + molecule_a.writeAllOuputs() + print("LigParGen finished succesfully!") if self.xyz: - lmp_file = f"{self.structure_dir}/{self.name}.lmp" + lmp_file = os.path.join(self.structure_dir, self.name + ".lmp") data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] @@ -116,7 +92,7 @@ def data_from_smiles(self): line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) lines.append(line) - with open(os.path.join(self.structure_dir, lmp_name + ".xyz"), "w") as xyz_file: + with open(os.path.join(self.structure_dir, self.name + ".xyz"), "w") as xyz_file: xyz_file.write("\n".join(lines)) print(".xyz file saved.") @@ -280,7 +256,7 @@ def download_data(self, lmp_name: str): data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = [] + lines = list() lines.append(str(len(coords.index))) lines.append("") for _, r in coords.iterrows(): From 60fd96e40283afb03a7c5540545b5e3f43bb0903 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:19:23 +0800 Subject: [PATCH 07/21] fix smile input --- mdgo/forcefield/ligpargen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index 39898c72..d1aae688 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -61,7 +61,7 @@ def __init__( def run(self): if self.structure_format == "SMILES": molecule_a = LigParGen( - smile=self.structure, + smile=self.name, charge=self.charge, numberOfOptimizations=self.opt, molname=self.name, From 9201787a552f93cef729bc2f9d2abc4b26dd660e Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:21:28 +0800 Subject: [PATCH 08/21] drop output format designation --- mdgo/forcefield/ligpargen.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/ligpargen.py index d1aae688..8b0474f1 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/ligpargen.py @@ -40,7 +40,6 @@ def __init__( structure_name: str, structure_dir: str, working_dir: str = "boss_files", - out: str = "lmp", charge: int = 0, opt: int = 0, xyz: bool = False, @@ -53,7 +52,6 @@ def __init__( print("Input format:", self.structure_format) self.structure_dir = structure_dir self.work = working_dir - self.out = out self.charge = charge self.opt = opt self.xyz = xyz From 88403e4c85595d49917d0659bbbae3e0ea1ea9c4 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:25:08 +0800 Subject: [PATCH 09/21] refactor ligpargen to mdgoligpargen --- mdgo/forcefield/__init__.py | 2 +- mdgo/forcefield/{ligpargen.py => mdgoligpargen.py} | 1 - tests/test_forcefield.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) rename mdgo/forcefield/{ligpargen.py => mdgoligpargen.py} (99%) diff --git a/mdgo/forcefield/__init__.py b/mdgo/forcefield/__init__.py index 8ae75669..8b04775c 100644 --- a/mdgo/forcefield/__init__.py +++ b/mdgo/forcefield/__init__.py @@ -18,4 +18,4 @@ from .charge import ChargeWriter from .maestro import MaestroRunner from .pubchem import PubChemRunner -from .ligpargen import LigpargenRunner, FFcrawler +from .mdgoligpargen import LigpargenRunner, FFcrawler diff --git a/mdgo/forcefield/ligpargen.py b/mdgo/forcefield/mdgoligpargen.py similarity index 99% rename from mdgo/forcefield/ligpargen.py rename to mdgo/forcefield/mdgoligpargen.py index 8b0474f1..33b43f60 100644 --- a/mdgo/forcefield/ligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -14,7 +14,6 @@ """ import os -import subprocess import shutil import time from typing import Optional diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index 71618768..709b0fea 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from mdgo.forcefield.ligpargen import * +from mdgo.forcefield.mdgoligpargen import * from mdgo.forcefield.aqueous import * test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") From 8a081536831170335f949d6553aca4f79c4fb2e9 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:48:05 +0800 Subject: [PATCH 10/21] handle ligpargen not found --- tests/test_forcefield.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index 709b0fea..eaa563c7 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -8,7 +8,10 @@ import pytest from mdgo.forcefield.mdgoligpargen import * -from mdgo.forcefield.aqueous import * +try: + from mdgo.forcefield.aqueous import * +except ModuleNotFoundError: + pass test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") From 14555d9d73406875dbea0ba360621e0d261206e9 Mon Sep 17 00:00:00 2001 From: LIANG Bochun Date: Thu, 1 Feb 2024 21:51:12 +0800 Subject: [PATCH 11/21] Update mdgoligpargen.py Add comments on parameters of the class LigpargenRunner, and add the operation of copying lmp file from 'working_dir' to 'write_dir'. --- mdgo/forcefield/mdgoligpargen.py | 40 +++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index 33b43f60..34bb0b8c 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -16,6 +16,7 @@ import os import shutil import time +import subprocess from typing import Optional @@ -34,28 +35,50 @@ class LigpargenRunner: + """ + LigpargenRunner make use of LigParGen2.1 and BOSS5.0 to generate LAMMPS + data file and xyz file from structure file. + + Args: + structure_name: Name of the input structure file including file format. + structure_dir: Directory of the structure file and output file. + working_dir: Files generated from BOSS software. Arguement of LigParGen. + Default to "boss_files". + charge: Molecule net charge. Arguement of LigParGen. Default to 0. + opt: Number of optimizations. Arguement of LIgParGen. Default to 0. + xyz: Whether to write the structure in the LigParGen generated data file + as .xyz. Default to False. This is useful because the order and the + name of the atoms could be different from the initial input.) + + Examples: + + >>> lpg = LigpargenRunner('sturcture_name', 'path/to/structure/') + >>> lpg.run() + """ + def __init__( self, structure_name: str, - structure_dir: str, + write_dir: str, working_dir: str = "boss_files", charge: int = 0, opt: int = 0, xyz: bool = False, ): """Base constructor.""" - self.structure = os.path.join(structure_dir, structure_name) + self.name, self.structure_format = os.path.splitext(structure_name) if self.structure_format == "": self.structure_format = "SMILES" print("Input format:", self.structure_format) - self.structure_dir = structure_dir + self.structure_name = structure_name + self.write_dir = write_dir self.work = working_dir self.charge = charge self.opt = opt self.xyz = xyz - def run(self): + def run(self, structure_dir: str): if self.structure_format == "SMILES": molecule_a = LigParGen( smile=self.name, @@ -65,6 +88,7 @@ def run(self): workdir=self.work, ) else: + self.structure = os.path.join(structure_dir, self.structure_name) molecule_a = LigParGen( ifile=self.structure, charge=self.charge, @@ -73,10 +97,14 @@ def run(self): workdir=self.work, ) molecule_a.writeAllOuputs() + lmp_name = f"{self.name}.lmp" + lmp_file = os.path.join(self.write_dir, lmp_name) + copy_file = os.path.join(self.work, f"{self.name}.lammps.lmp") + shutil.copyfile(copy_file, lmp_file) print("LigParGen finished succesfully!") if self.xyz: - lmp_file = os.path.join(self.structure_dir, self.name + ".lmp") + lmp_file = os.path.join(self.write_dir, self.name + ".lmp") data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] @@ -89,7 +117,7 @@ def run(self): line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) lines.append(line) - with open(os.path.join(self.structure_dir, self.name + ".xyz"), "w") as xyz_file: + with open(os.path.join(self.write_dir, lmp_name + ".xyz"), "w") as xyz_file: xyz_file.write("\n".join(lines)) print(".xyz file saved.") From c9dfb17285d6659a132583a0b123ec6f980ee16c Mon Sep 17 00:00:00 2001 From: LIANG Bochun Date: Thu, 1 Feb 2024 21:53:52 +0800 Subject: [PATCH 12/21] Test for LigpargenRunner Test for LigpargenRunner using EC. --- tests/test_files/CCOC(=O)O.lmp | 178 +++++++++++++++++++++++++++++ tests/test_files/CCOC(=O)O.lmp.xyz | 14 +++ tests/test_files/CCOC(=O)O.xyz | 14 +++ tests/test_files/EC.lmp | 176 ++++++++++++++++++++++++++++ tests/test_files/EC.lmp.xyz | 12 ++ tests/test_forcefield.py | 62 +++++++++- 6 files changed, 452 insertions(+), 4 deletions(-) create mode 100644 tests/test_files/CCOC(=O)O.lmp create mode 100644 tests/test_files/CCOC(=O)O.lmp.xyz create mode 100644 tests/test_files/CCOC(=O)O.xyz create mode 100644 tests/test_files/EC.lmp create mode 100644 tests/test_files/EC.lmp.xyz diff --git a/tests/test_files/CCOC(=O)O.lmp b/tests/test_files/CCOC(=O)O.lmp new file mode 100644 index 00000000..6c023380 --- /dev/null +++ b/tests/test_files/CCOC(=O)O.lmp @@ -0,0 +1,178 @@ +LAMMPS data file generated with LigParGen (israel.cabezadevaca@yale.edu) + + 12 atoms + 11 bonds + 17 angles + 16 dihedrals + 1 impropers + + 12 atom types + 11 bond types + 17 angle types + 16 dihedral types + 1 improper types + + -2.525000 47.475000 xlo xhi + -1.664000 48.336000 ylo yhi + -1.488000 48.512000 zlo zhi + +Masses + + 1 12.011 + 2 15.999 + 3 15.999 + 4 15.999 + 5 12.011 + 6 12.011 + 7 1.008 + 8 1.008 + 9 1.008 + 10 1.008 + 11 1.008 + 12 1.008 + +Pair Coeffs + + 1 0.070 3.5500000 + 2 0.140 2.9000000 + 3 0.210 2.9600000 + 4 0.170 3.1200000 + 5 0.066 3.5000000 + 6 0.066 3.5000000 + 7 0.030 2.5000000 + 8 0.030 2.5000000 + 9 0.030 2.5000000 + 10 0.030 2.5000000 + 11 0.030 2.5000000 + 12 0.000 0.0000000 + +Bond Coeffs + + 1 214.0000 1.3270 + 2 570.0000 1.2290 + 3 450.0000 1.3640 + 4 320.0000 1.4100 + 5 268.0000 1.5290 + 6 340.0000 1.0900 + 7 340.0000 1.0900 + 8 340.0000 1.0900 + 9 340.0000 1.0900 + 10 340.0000 1.0900 + 11 553.0000 0.9450 + +Angle Coeffs + + 1 83.000 123.400 + 2 70.130 117.450 + 3 83.000 116.900 + 4 50.000 109.500 + 5 37.500 110.700 + 6 37.500 110.700 + 7 37.500 110.700 + 8 35.000 109.500 + 9 35.000 109.500 + 10 35.000 113.000 + 11 80.000 121.000 + 12 37.500 110.700 + 13 37.500 110.700 + 14 33.000 107.800 + 15 33.000 107.800 + 16 33.000 107.800 + 17 33.000 107.800 + +Dihedral Coeffs + + 1 0.000 5.124 0.000 0.000 + 2 -1.220 -0.126 0.422 0.000 + 3 0.000 0.000 0.468 0.000 + 4 0.000 0.000 0.468 0.000 + 5 0.000 0.000 0.468 0.000 + 6 0.000 0.000 0.198 0.000 + 7 0.000 0.000 0.198 0.000 + 8 0.000 5.500 0.000 0.000 + 9 0.000 5.500 0.000 0.000 + 10 4.669 5.124 0.000 0.000 + 11 0.000 0.000 0.300 0.000 + 12 0.000 0.000 0.300 0.000 + 13 0.000 0.000 0.300 0.000 + 14 0.000 0.000 0.300 0.000 + 15 0.000 0.000 0.300 0.000 + 16 0.000 0.000 0.300 0.000 + +Improper Coeffs + + 1 10.500 -1 2 + +Atoms + + 1 1 1 0.589708 1.45900 -0.07400 0.01300 + 2 1 2 -0.316075 0.46100 -1.02200 0.18800 + 3 1 3 -0.521319 1.26800 0.98800 -0.62500 + 4 1 4 -0.548751 2.74000 -0.25400 0.54300 + 5 1 5 0.030243 -0.81200 -0.76900 -0.37800 + 6 1 6 -0.299103 -1.42400 0.48800 0.16700 + 7 1 7 0.109289 -2.52500 0.43200 0.01900 + 8 1 8 0.109289 -1.01900 1.37200 -0.33100 + 9 1 9 0.109289 -1.27100 0.57200 1.28100 + 10 1 10 0.123815 -0.71600 -0.64200 -1.48800 + 11 1 11 0.123815 -1.43200 -1.66400 -0.16600 + 12 1 12 0.489800 3.27000 0.57400 0.77800 + +Bonds + + 1 1 2 1 + 2 2 3 1 + 3 3 4 1 + 4 4 5 2 + 5 5 6 5 + 6 6 7 6 + 7 7 8 6 + 8 8 9 6 + 9 9 10 5 + 10 10 11 5 + 11 11 12 4 + +Angles + + 1 1 3 1 2 + 2 2 4 1 2 + 3 3 5 2 1 + 4 4 6 5 2 + 5 5 7 6 5 + 6 6 8 6 5 + 7 7 9 6 5 + 8 8 10 5 2 + 9 9 11 5 2 + 10 10 12 4 1 + 11 11 4 1 3 + 12 12 10 5 6 + 13 13 11 5 6 + 14 14 8 6 7 + 15 15 9 6 7 + 16 16 9 6 8 + 17 17 11 5 10 + +Dihedrals + + 1 1 5 2 1 3 + 2 2 6 5 2 1 + 3 3 7 6 5 2 + 4 4 8 6 5 2 + 5 5 9 6 5 2 + 6 6 10 5 2 1 + 7 7 11 5 2 1 + 8 8 12 4 1 2 + 9 9 12 4 1 3 + 10 10 5 2 1 4 + 11 11 10 5 6 7 + 12 12 11 5 6 7 + 13 13 10 5 6 8 + 14 14 11 5 6 8 + 15 15 10 5 6 9 + 16 16 11 5 6 9 + +Impropers + + 1 1 2 1 3 4 + + diff --git a/tests/test_files/CCOC(=O)O.lmp.xyz b/tests/test_files/CCOC(=O)O.lmp.xyz new file mode 100644 index 00000000..a0272bf5 --- /dev/null +++ b/tests/test_files/CCOC(=O)O.lmp.xyz @@ -0,0 +1,14 @@ +12 + +C 1.459 -0.074 0.013 +O 0.461 -1.022 0.188 +O 1.268 0.988 -0.625 +O 2.74 -0.254 0.543 +C -0.812 -0.769 -0.378 +C -1.424 0.488 0.167 +H -2.525 0.432 0.019 +H -1.019 1.372 -0.331 +H -1.271 0.572 1.281 +H -0.716 -0.642 -1.488 +H -1.432 -1.664 -0.166 +H 3.27 0.574 0.778 \ No newline at end of file diff --git a/tests/test_files/CCOC(=O)O.xyz b/tests/test_files/CCOC(=O)O.xyz new file mode 100644 index 00000000..a0272bf5 --- /dev/null +++ b/tests/test_files/CCOC(=O)O.xyz @@ -0,0 +1,14 @@ +12 + +C 1.459 -0.074 0.013 +O 0.461 -1.022 0.188 +O 1.268 0.988 -0.625 +O 2.74 -0.254 0.543 +C -0.812 -0.769 -0.378 +C -1.424 0.488 0.167 +H -2.525 0.432 0.019 +H -1.019 1.372 -0.331 +H -1.271 0.572 1.281 +H -0.716 -0.642 -1.488 +H -1.432 -1.664 -0.166 +H 3.27 0.574 0.778 \ No newline at end of file diff --git a/tests/test_files/EC.lmp b/tests/test_files/EC.lmp new file mode 100644 index 00000000..9d92f47c --- /dev/null +++ b/tests/test_files/EC.lmp @@ -0,0 +1,176 @@ +LAMMPS data file generated with LigParGen (israel.cabezadevaca@yale.edu) + + 10 atoms + 10 bonds + 17 angles + 19 dihedrals + 1 impropers + + 10 atom types + 10 bond types + 17 angle types + 19 dihedral types + 1 improper types + + -2.065000 47.935000 xlo xhi + -1.301000 48.699000 ylo yhi + -1.234000 48.766000 zlo zhi + +Masses + + 1 15.999 + 2 15.999 + 3 15.999 + 4 12.011 + 5 12.011 + 6 12.011 + 7 1.008 + 8 1.008 + 9 1.008 + 10 1.008 + +Pair Coeffs + + 1 0.140 2.9000000 + 2 0.140 2.9000000 + 3 0.210 2.9600000 + 4 0.066 3.5000000 + 5 0.066 3.5000000 + 6 0.070 3.5500000 + 7 0.030 2.5000000 + 8 0.030 2.5000000 + 9 0.030 2.5000000 + 10 0.030 2.5000000 + +Bond Coeffs + + 1 214.0000 1.3270 + 2 214.0000 1.3270 + 3 570.0000 1.2290 + 4 320.0000 1.4100 + 5 268.0000 1.5290 + 6 340.0000 1.0900 + 7 340.0000 1.0900 + 8 340.0000 1.0900 + 9 340.0000 1.0900 + 10 320.0000 1.4100 + +Angle Coeffs + + 1 70.130 117.450 + 2 83.000 123.400 + 3 83.000 116.900 + 4 50.000 109.500 + 5 35.000 109.500 + 6 35.000 109.500 + 7 37.500 110.700 + 8 37.500 110.700 + 9 83.000 123.400 + 10 37.500 110.700 + 11 37.500 110.700 + 12 83.000 116.900 + 13 33.000 107.800 + 14 35.000 109.500 + 15 33.000 107.800 + 16 35.000 109.500 + 17 50.000 109.500 + +Dihedral Coeffs + + 1 4.669 5.124 0.000 0.000 + 2 -1.220 -0.126 0.422 0.000 + 3 0.000 0.000 0.198 0.000 + 4 0.000 0.000 0.198 0.000 + 5 0.000 0.000 0.468 0.000 + 6 0.000 0.000 0.468 0.000 + 7 4.669 5.124 0.000 0.000 + 8 0.000 5.124 0.000 0.000 + 9 0.000 5.124 0.000 0.000 + 10 0.000 0.000 0.468 0.000 + 11 0.000 0.000 0.300 0.000 + 12 0.000 0.000 0.300 0.000 + 13 0.000 0.000 0.468 0.000 + 14 0.000 0.000 0.300 0.000 + 15 0.000 0.000 0.300 0.000 + 16 0.000 0.000 0.198 0.000 + 17 0.000 0.000 0.198 0.000 + 18 -0.550 0.000 0.000 0.000 + 19 -1.220 -0.126 0.422 0.000 + +Improper Coeffs + + 1 10.500 -1 2 + +Atoms + + 1 1 1 -0.324734 -0.01800 1.11900 -0.11200 + 2 1 2 -0.324709 -0.01800 -1.12000 0.11200 + 3 1 3 -0.368343 1.98200 0.00000 0.00000 + 4 1 4 -0.041460 -1.35300 0.72800 0.17200 + 5 1 5 -0.042098 -1.35300 -0.72700 -0.17200 + 6 1 6 0.559059 0.76000 0.00000 0.00000 + 7 1 7 0.135599 -2.06500 1.30200 -0.42600 + 8 1 8 0.135599 -1.54800 0.90700 1.23500 + 9 1 9 0.135544 -2.06500 -1.30100 0.42700 + 10 1 10 0.135543 -1.55000 -0.90600 -1.23400 + +Bonds + + 1 1 1 6 + 2 2 2 6 + 3 3 3 6 + 4 4 4 1 + 5 5 5 4 + 6 6 7 4 + 7 7 8 4 + 8 8 9 5 + 9 9 10 5 + 10 10 5 2 + +Angles + + 1 1 2 6 1 + 2 2 3 6 1 + 3 3 4 1 6 + 4 4 5 4 1 + 5 5 7 4 1 + 6 6 8 4 1 + 7 7 9 5 4 + 8 8 10 5 4 + 9 9 3 6 2 + 10 10 7 4 5 + 11 11 8 4 5 + 12 12 6 2 5 + 13 13 8 4 7 + 14 14 2 5 9 + 15 15 10 5 9 + 16 16 2 5 10 + 17 17 4 5 2 + +Dihedrals + + 1 1 4 1 6 2 + 2 2 5 4 1 6 + 3 3 7 4 1 6 + 4 4 8 4 1 6 + 5 5 9 5 4 1 + 6 6 10 5 4 1 + 7 7 5 2 6 1 + 8 8 4 1 6 3 + 9 9 5 2 6 3 + 10 10 2 5 4 7 + 11 11 9 5 4 7 + 12 12 10 5 4 7 + 13 13 2 5 4 8 + 14 14 9 5 4 8 + 15 15 10 5 4 8 + 16 16 6 2 5 9 + 17 17 6 2 5 10 + 18 18 1 4 5 2 + 19 19 6 2 5 4 + +Impropers + + 1 1 1 6 2 3 + + diff --git a/tests/test_files/EC.lmp.xyz b/tests/test_files/EC.lmp.xyz new file mode 100644 index 00000000..158dea4a --- /dev/null +++ b/tests/test_files/EC.lmp.xyz @@ -0,0 +1,12 @@ +10 + +O -0.018 1.119 -0.112 +O -0.018 -1.12 0.112 +O 1.982 0.0 0.0 +C -1.353 0.728 0.172 +C -1.353 -0.727 -0.172 +C 0.76 0.0 0.0 +H -2.065 1.302 -0.426 +H -1.548 0.907 1.235 +H -2.065 -1.301 0.427 +H -1.55 -0.906 -1.234 \ No newline at end of file diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index eaa563c7..ab0367cd 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -8,14 +8,68 @@ import pytest from mdgo.forcefield.mdgoligpargen import * -try: - from mdgo.forcefield.aqueous import * -except ModuleNotFoundError: - pass +#from mdgo.forcefield.crawler import * +from mdgo.forcefield.aqueous import * test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") +class LigpargenRunnerTest(unittest.TestCase): + def test_run(self) -> None: + with open(os.path.join(test_dir, "EC.lmp")) as f: + pdf = f.readlines() + with open(os.path.join(test_dir, "EC.lmp.xyz")) as f: + xyz = f.readlines() + with open(os.path.join(test_dir, "CCOC(=O)O.lmp")) as f: + smiles = f.readlines() + with open(os.path.join(test_dir, "CCOC(=O)O.lmp.xyz")) as f: + xyz_smiles = f.readlines() + + saved_stdout = sys.stdout + download_dir = tempfile.mkdtemp() + try: + out = StringIO() + sys.stdout = out + + lpg = LigpargenRunner("EC.pdb", download_dir, xyz=True) + lpg.run(test_dir) + self.assertIn( + "Input format: .pdb\n" + "LigParGen finished succesfully!\n" + ".xyz file saved.", + out.getvalue(), + ) + self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp"))) + self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp.xyz"))) + with open(os.path.join(download_dir, "EC.lmp")) as f: + pdf_actual = f.readlines() + self.assertListEqual(pdf, pdf_actual) + with open(os.path.join(download_dir, "EC.lmp.xyz")) as f: + xyz_actual = f.readlines() + self.assertListEqual(xyz, xyz_actual) + + lpg = LigpargenRunner("CCOC(=O)O", download_dir, xyz=True) + lpg.run(test_dir) + self.assertIn( + "Input format: SMILES\n" + "LigParGen finished succesfully!\n" + ".xyz file saved.", + out.getvalue(), + ) + self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp"))) + self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz"))) + with open(os.path.join(download_dir, "CCOC(=O)O.lmp")) as f: + smiles_actual = f.readlines() + self.assertListEqual(smiles, smiles_actual) + with open(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz")) as f: + xyz_actual = f.readlines() + self.assertListEqual(xyz_smiles, xyz_actual) + + finally: + sys.stdout = saved_stdout + shutil.rmtree(download_dir) + + class FFcrawlerTest(unittest.TestCase): def test_chrome(self) -> None: with open(os.path.join(test_dir, "EMC.lmp")) as f: From 97bbbce5be206d24a11c1d5bf806d32c115433eb Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:08:36 +0800 Subject: [PATCH 13/21] revert wrong changes --- mdgo/forcefield/mdgoligpargen.py | 1 - tests/test_forcefield.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index 34bb0b8c..13c9b3fa 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -16,7 +16,6 @@ import os import shutil import time -import subprocess from typing import Optional diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index ab0367cd..b7bf3c6f 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -8,8 +8,10 @@ import pytest from mdgo.forcefield.mdgoligpargen import * -#from mdgo.forcefield.crawler import * -from mdgo.forcefield.aqueous import * +try: + from mdgo.forcefield.aqueous import * +except ModuleNotFoundError: + pass test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") From 19436e6dd48e07ed52a0e1cce08d1214735a8a3e Mon Sep 17 00:00:00 2001 From: LIANG Bochun Date: Fri, 2 Feb 2024 18:56:01 +0800 Subject: [PATCH 14/21] Change Args for LigpargenRunner and the run function. Change Args for LigpargenRunner and the function 'run'. Add docstrings for the fuction 'run'. --- mdgo/forcefield/mdgoligpargen.py | 23 +++++++++++++++-------- tests/test_forcefield.py | 6 +++--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index 13c9b3fa..1b156c4f 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -39,8 +39,8 @@ class LigpargenRunner: data file and xyz file from structure file. Args: - structure_name: Name of the input structure file including file format. - structure_dir: Directory of the structure file and output file. + structure_file: The input structure file including path to file. + write_dir: Directory to write the output file. working_dir: Files generated from BOSS software. Arguement of LigParGen. Default to "boss_files". charge: Molecule net charge. Arguement of LigParGen. Default to 0. @@ -51,13 +51,13 @@ class LigpargenRunner: Examples: - >>> lpg = LigpargenRunner('sturcture_name', 'path/to/structure/') + >>> lpg = LigpargenRunner('sturcture_file', 'path/to/write/output') >>> lpg.run() """ def __init__( self, - structure_name: str, + structure_file: str, write_dir: str, working_dir: str = "boss_files", charge: int = 0, @@ -65,19 +65,27 @@ def __init__( xyz: bool = False, ): """Base constructor.""" - + structure_dir, structure_name = os.path.split(structure_file) self.name, self.structure_format = os.path.splitext(structure_name) if self.structure_format == "": self.structure_format = "SMILES" print("Input format:", self.structure_format) - self.structure_name = structure_name + self.structure = structure_file self.write_dir = write_dir self.work = working_dir self.charge = charge self.opt = opt self.xyz = xyz - def run(self, structure_dir: str): + def run(self): + """ + Run LigParGen2.1 to generate a LAMMPS data file from a structrure file + (pdb/mol/mol2...) supported by BOSS5.0. + Write out a LAMMPS data file. + + Args: + None + """ if self.structure_format == "SMILES": molecule_a = LigParGen( smile=self.name, @@ -87,7 +95,6 @@ def run(self, structure_dir: str): workdir=self.work, ) else: - self.structure = os.path.join(structure_dir, self.structure_name) molecule_a = LigParGen( ifile=self.structure, charge=self.charge, diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index b7bf3c6f..51ea9059 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -33,8 +33,8 @@ def test_run(self) -> None: out = StringIO() sys.stdout = out - lpg = LigpargenRunner("EC.pdb", download_dir, xyz=True) - lpg.run(test_dir) + lpg = LigpargenRunner(os.path.join(test_dir, "EC.pdb"), download_dir, xyz=True) + lpg.run() self.assertIn( "Input format: .pdb\n" "LigParGen finished succesfully!\n" @@ -51,7 +51,7 @@ def test_run(self) -> None: self.assertListEqual(xyz, xyz_actual) lpg = LigpargenRunner("CCOC(=O)O", download_dir, xyz=True) - lpg.run(test_dir) + lpg.run() self.assertIn( "Input format: SMILES\n" "LigParGen finished succesfully!\n" From fb1f1e125a3baef8087a95083366ae2db65aede7 Mon Sep 17 00:00:00 2001 From: LIANG Bochun Date: Fri, 2 Feb 2024 22:26:18 +0800 Subject: [PATCH 15/21] Some coding/typo errors --- mdgo/forcefield/mdgoligpargen.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index 1b156c4f..faaa4bc4 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -29,8 +29,9 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait -from mdgo.util.dict_utils import lmp_mass_to_name from ligpargen.ligpargen import LigParGen +from mdgo.util.dict_utils import lmp_mass_to_name + class LigpargenRunner: @@ -47,7 +48,7 @@ class LigpargenRunner: opt: Number of optimizations. Arguement of LIgParGen. Default to 0. xyz: Whether to write the structure in the LigParGen generated data file as .xyz. Default to False. This is useful because the order and the - name of the atoms could be different from the initial input.) + name of the atoms could be different from the initial input. Examples: @@ -84,7 +85,7 @@ def run(self): Write out a LAMMPS data file. Args: - None + None. """ if self.structure_format == "SMILES": molecule_a = LigParGen( @@ -141,7 +142,7 @@ class FFcrawler: xyz: Whether to write the structure in the LigParGen generated data file as .xyz. Default to False. This is useful because the order and the name of the atoms could be - different from the initial input.) + different from the initial input. gromacs: Whether to save GROMACS format data files. Default to False. From 652e1c55a5932ee46f5026c28473f042ffc49d69 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:03:38 +0800 Subject: [PATCH 16/21] black --- mdgo/forcefield/mdgoligpargen.py | 18 ++++++++++-------- tests/test_forcefield.py | 28 +++++++++------------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index faaa4bc4..4360f2ca 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -29,33 +29,35 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait -from ligpargen.ligpargen import LigParGen +try: + from ligpargen.ligpargen import LigParGen +except ModuleNotFoundError: + pass from mdgo.util.dict_utils import lmp_mass_to_name - class LigpargenRunner: """ LigpargenRunner make use of LigParGen2.1 and BOSS5.0 to generate LAMMPS data file and xyz file from structure file. - + Args: structure_file: The input structure file including path to file. write_dir: Directory to write the output file. - working_dir: Files generated from BOSS software. Arguement of LigParGen. + working_dir: Files generated from BOSS software. Arguement of LigParGen. Default to "boss_files". charge: Molecule net charge. Arguement of LigParGen. Default to 0. opt: Number of optimizations. Arguement of LIgParGen. Default to 0. xyz: Whether to write the structure in the LigParGen generated data file - as .xyz. Default to False. This is useful because the order and the + as .xyz. Default to False. This is useful because the order and the name of the atoms could be different from the initial input. - + Examples: >>> lpg = LigpargenRunner('sturcture_file', 'path/to/write/output') >>> lpg.run() """ - + def __init__( self, structure_file: str, @@ -83,7 +85,7 @@ def run(self): Run LigParGen2.1 to generate a LAMMPS data file from a structrure file (pdb/mol/mol2...) supported by BOSS5.0. Write out a LAMMPS data file. - + Args: None. """ diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index 51ea9059..10c85ca8 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -8,10 +8,7 @@ import pytest from mdgo.forcefield.mdgoligpargen import * -try: - from mdgo.forcefield.aqueous import * -except ModuleNotFoundError: - pass +from mdgo.forcefield.aqueous import * test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") @@ -32,15 +29,13 @@ def test_run(self) -> None: try: out = StringIO() sys.stdout = out - + lpg = LigpargenRunner(os.path.join(test_dir, "EC.pdb"), download_dir, xyz=True) lpg.run() self.assertIn( - "Input format: .pdb\n" - "LigParGen finished succesfully!\n" - ".xyz file saved.", + "Input format: .pdb\n" "LigParGen finished succesfully!\n" ".xyz file saved.", out.getvalue(), - ) + ) self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp"))) self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp.xyz"))) with open(os.path.join(download_dir, "EC.lmp")) as f: @@ -49,15 +44,13 @@ def test_run(self) -> None: with open(os.path.join(download_dir, "EC.lmp.xyz")) as f: xyz_actual = f.readlines() self.assertListEqual(xyz, xyz_actual) - + lpg = LigpargenRunner("CCOC(=O)O", download_dir, xyz=True) lpg.run() self.assertIn( - "Input format: SMILES\n" - "LigParGen finished succesfully!\n" - ".xyz file saved.", + "Input format: SMILES\n" "LigParGen finished succesfully!\n" ".xyz file saved.", out.getvalue(), - ) + ) self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp"))) self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz"))) with open(os.path.join(download_dir, "CCOC(=O)O.lmp")) as f: @@ -94,14 +87,11 @@ def test_chrome(self) -> None: lpg = FFcrawler(download_dir, xyz=True, gromacs=True) lpg.data_from_pdb(os.path.join(test_dir, "EMC.pdb")) self.assertIn( - "LigParGen server connected.\n" - "Structure info uploaded. Rendering force field...\n", + "LigParGen server connected.\n" "Structure info uploaded. Rendering force field...\n", out.getvalue(), ) self.assertIn( - "Force field file downloaded.\n" - ".xyz file saved.\n" - "Force field file saved.\n", + "Force field file downloaded.\n" ".xyz file saved.\n" "Force field file saved.\n", out.getvalue(), ) self.assertTrue(os.path.exists(os.path.join(download_dir, "EMC.lmp"))) From fa53ce8c8228245e888641aa2299b46cf5bb5499 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:22:10 +0800 Subject: [PATCH 17/21] improve file copy step --- mdgo/forcefield/mdgoligpargen.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index 4360f2ca..b909b6a3 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -43,9 +43,7 @@ class LigpargenRunner: Args: structure_file: The input structure file including path to file. - write_dir: Directory to write the output file. - working_dir: Files generated from BOSS software. Arguement of LigParGen. - Default to "boss_files". + work_dir: Directory to write the output file. charge: Molecule net charge. Arguement of LigParGen. Default to 0. opt: Number of optimizations. Arguement of LIgParGen. Default to 0. xyz: Whether to write the structure in the LigParGen generated data file @@ -61,8 +59,7 @@ class LigpargenRunner: def __init__( self, structure_file: str, - write_dir: str, - working_dir: str = "boss_files", + work_dir: str, charge: int = 0, opt: int = 0, xyz: bool = False, @@ -74,8 +71,7 @@ def __init__( self.structure_format = "SMILES" print("Input format:", self.structure_format) self.structure = structure_file - self.write_dir = write_dir - self.work = working_dir + self.work_dir = work_dir self.charge = charge self.opt = opt self.xyz = xyz @@ -95,7 +91,7 @@ def run(self): charge=self.charge, numberOfOptimizations=self.opt, molname=self.name, - workdir=self.work, + workdir=self.work_dir, ) else: molecule_a = LigParGen( @@ -103,17 +99,13 @@ def run(self): charge=self.charge, numberOfOptimizations=self.opt, molname=self.name, - workdir=self.work, + workdir=self.work_dir, ) molecule_a.writeAllOuputs() - lmp_name = f"{self.name}.lmp" - lmp_file = os.path.join(self.write_dir, lmp_name) - copy_file = os.path.join(self.work, f"{self.name}.lammps.lmp") - shutil.copyfile(copy_file, lmp_file) print("LigParGen finished succesfully!") if self.xyz: - lmp_file = os.path.join(self.write_dir, self.name + ".lmp") + lmp_file = os.path.join(self.work_dir, self.name + ".lmp") data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] @@ -126,7 +118,7 @@ def run(self): line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) lines.append(line) - with open(os.path.join(self.write_dir, lmp_name + ".xyz"), "w") as xyz_file: + with open(os.path.join(self.work_dir, self.name + ".xyz"), "w") as xyz_file: xyz_file.write("\n".join(lines)) print(".xyz file saved.") From 6e92ccef051e6c7b5ef6ea8c89941afcf8434691 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Sun, 4 Feb 2024 10:46:44 +0800 Subject: [PATCH 18/21] update list literals --- mdgo/forcefield/mdgoligpargen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index b909b6a3..be4bc5c2 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -109,7 +109,7 @@ def run(self): data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = list() + lines = [] lines.append(str(len(coords.index))) lines.append("") for _, r in coords.iterrows(): @@ -282,7 +282,7 @@ def download_data(self, lmp_name: str): data_obj = LammpsData.from_file(lmp_file) element_id_dict = lmp_mass_to_name(data_obj.masses) coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = list() + lines = [] lines.append(str(len(coords.index))) lines.append("") for _, r in coords.iterrows(): From 24026775a0dd94a408754707ad05466e31c77e63 Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:51:52 +0800 Subject: [PATCH 19/21] move ffcrawler to mdgoligpargen; linting --- mdgo/forcefield/crawler.py | 200 ------------------------------- mdgo/forcefield/mdgoligpargen.py | 25 ++-- 2 files changed, 9 insertions(+), 216 deletions(-) diff --git a/mdgo/forcefield/crawler.py b/mdgo/forcefield/crawler.py index d17aa7f1..27c5c38c 100644 --- a/mdgo/forcefield/crawler.py +++ b/mdgo/forcefield/crawler.py @@ -5,204 +5,4 @@ This module was used to implement a core class FFcrawler, and has been moved to ligpargen module. """ -from __future__ import annotations -import os -import shutil -import time - -from pymatgen.io.lammps.data import LammpsData -from selenium import webdriver -from selenium.common.exceptions import TimeoutException, WebDriverException -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait - -from mdgo.util.dict_utils import lmp_mass_to_name - - -class FFcrawler: - """ - Web scrapper that can automatically upload structure to the LigParGen - server and download LAMMPS/GROMACS data file. - - Args: - write_dir: Directory for writing output. - chromedriver_dir: Directory to the ChromeDriver executable. - headless: Whether to run Chrome in headless (silent) mode. - Default to True. - xyz: Whether to write the structure in the LigParGen - generated data file as .xyz. Default to False. This is useful - because the order and the name of the atoms could be - different from the initial input.) - gromacs: Whether to save GROMACS format data files. - Default to False. - - Examples: - >>> lpg = FFcrawler('/path/to/work/dir', '/path/to/chromedriver') - >>> lpg.data_from_pdb("/path/to/pdb") - """ - - def __init__( - self, - write_dir: str, - chromedriver_dir: str | None = None, - headless: bool = True, - xyz: bool = False, - gromacs: bool = False, - ): - """Base constructor.""" - self.write_dir = write_dir - self.xyz = xyz - self.gromacs = gromacs - self.preferences = { - "download.default_directory": write_dir, - "safebrowsing.enabled": "false", - "profile.managed_default_content_settings.images": 2, - } - self.options = webdriver.ChromeOptions() - self.server = webdriver.ChromeService(chromedriver_dir) - self.options.add_argument( - 'user-agent="Mozilla/5.0 ' - "(Macintosh; Intel Mac OS X 10_14_6) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - 'Chrome/88.0.4324.146 Safari/537.36"' - ) - self.options.add_argument("--window-size=1920,1080") - self.options.add_argument("ignore-certificate-errors") - if headless: - self.options.add_argument("--headless") - self.options.add_experimental_option("prefs", self.preferences) - self.options.add_experimental_option("excludeSwitches", ["enable-automation"]) - if chromedriver_dir is None: - self.web = webdriver.Chrome(options=self.options) - else: - self.web = webdriver.Chrome(service=self.server, options=self.options) - self.wait = WebDriverWait(self.web, 10) - self.web.get("http://traken.chem.yale.edu/ligpargen/") - time.sleep(1) - print("LigParGen server connected.") - - def quit(self): - """Method for quiting ChromeDriver.""" - self.web.quit() - - def data_from_pdb(self, pdb_dir: str): - """ - Use the LigParGen server to generate a LAMMPS data file from a pdb file. - Write out a LAMMPS data file. - - Args: - pdb_dir: The path to the input pdb structure file. - """ - self.web.get("http://traken.chem.yale.edu/ligpargen/") - upload_xpath = '//*[@id="exampleMOLFile"]' - time.sleep(1) - self.wait.until(EC.presence_of_element_located((By.XPATH, upload_xpath))) - upload = self.web.find_element(By.XPATH, upload_xpath) - try: - upload.send_keys(pdb_dir) - submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") - submit.click() - pdb_filename = os.path.basename(pdb_dir) - self.download_data(os.path.splitext(pdb_filename)[0] + ".lmp") - except TimeoutException: - print("Timeout! Web server no response for 10s, file download failed!") - except WebDriverException as e: - print(e) - finally: - self.quit() - - def data_from_smiles(self, smiles_code): - """ - Use the LigParGen server to generate a LAMMPS data file from a SMILES code. - Write out a LAMMPS data file. - - Args: - smiles_code: The SMILES code for the LigParGen input. - """ - self.web.get("http://traken.chem.yale.edu/ligpargen/") - time.sleep(1) - smile = self.web.find_element(By.XPATH, '//*[@id="smiles"]') - smile.send_keys(smiles_code) - submit = self.web.find_element(By.XPATH, "/html/body/div[2]/div/div[2]/form/button[1]") - submit.click() - try: - self.download_data(smiles_code + ".lmp") - except TimeoutException: - print("Timeout! Web server no response for 10s, file download failed!") - finally: - self.quit() - - def download_data(self, lmp_name: str): - """ - Helper function that download and write out the LAMMPS data file. - - Args: - lmp_name: Name of the LAMMPS data file. - """ - print("Structure info uploaded. Rendering force field...") - lmp_xpath = "/html/body/div[2]/div[2]/div[1]/div/div[14]/form/input[1]" - self.wait.until(EC.presence_of_element_located((By.XPATH, lmp_xpath))) - jmol = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[2]") - self.web.execute_script("arguments[0].remove();", jmol) - self.wait.until(EC.element_to_be_clickable((By.XPATH, lmp_xpath))) - data_lmp = self.web.find_element(By.XPATH, lmp_xpath) - num_file = len([f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".lmp"]) + 1 - data_lmp.click() - while True: - files = sorted( - [ - os.path.join(self.write_dir, f) - for f in os.listdir(self.write_dir) - if os.path.splitext(f)[1] == ".lmp" - ], - key=os.path.getmtime, - ) - # wait for file to finish download - if len(files) < num_file: - time.sleep(1) - print("waiting for download to be initiated") - else: - newest = files[-1] - if ".crdownload" in newest: - time.sleep(1) - print("waiting for download to complete") - else: - break - print("Force field file downloaded.") - lmp_file = newest - if self.xyz: - data_obj = LammpsData.from_file(lmp_file) - element_id_dict = lmp_mass_to_name(data_obj.masses) - coords = data_obj.atoms[["type", "x", "y", "z"]] - lines = [] - lines.append(str(len(coords.index))) - lines.append("") - for _, r in coords.iterrows(): - element_name = element_id_dict.get(int(r["type"])) - assert element_name is not None - line = element_name + " " + " ".join(str(r[loc]) for loc in ["x", "y", "z"]) - lines.append(line) - - with open(os.path.join(self.write_dir, lmp_name + ".xyz"), "w") as xyz_file: - xyz_file.write("\n".join(lines)) - print(".xyz file saved.") - if self.gromacs: - data_gro = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[8]/form/input[1]") - data_itp = self.web.find_element(By.XPATH, "/html/body/div[2]/div[2]/div[1]/div/div[9]/form/input[1]") - data_gro.click() - data_itp.click() - time.sleep(1) - gro_file = max( - [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".gro"], - key=os.path.getctime, - ) - itp_file = max( - [self.write_dir + "/" + f for f in os.listdir(self.write_dir) if os.path.splitext(f)[1] == ".itp"], - key=os.path.getctime, - ) - shutil.move(gro_file, os.path.join(self.write_dir, lmp_name[:-4] + ".gro")) - shutil.move(itp_file, os.path.join(self.write_dir, lmp_name[:-4] + ".itp")) - shutil.move(lmp_file, os.path.join(self.write_dir, lmp_name)) - print("Force field file saved.") diff --git a/mdgo/forcefield/mdgoligpargen.py b/mdgo/forcefield/mdgoligpargen.py index be4bc5c2..1622a698 100644 --- a/mdgo/forcefield/mdgoligpargen.py +++ b/mdgo/forcefield/mdgoligpargen.py @@ -1,4 +1,3 @@ -# coding: utf-8 # Copyright (c) Tingzheng Hou. # Distributed under the terms of the MIT License. @@ -13,27 +12,25 @@ matches your Chrome version via https://chromedriver.chromium.org/downloads """ +from __future__ import annotations + import os import shutil import time -from typing import Optional - from pymatgen.io.lammps.data import LammpsData from selenium import webdriver -from selenium.common.exceptions import ( - TimeoutException, - WebDriverException, -) +from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait +from mdgo.util.dict_utils import lmp_mass_to_name + try: from ligpargen.ligpargen import LigParGen except ModuleNotFoundError: pass -from mdgo.util.dict_utils import lmp_mass_to_name class LigpargenRunner: @@ -50,8 +47,8 @@ class LigpargenRunner: as .xyz. Default to False. This is useful because the order and the name of the atoms could be different from the initial input. - Examples: + Examples: >>> lpg = LigpargenRunner('sturcture_file', 'path/to/write/output') >>> lpg.run() """ @@ -136,12 +133,11 @@ class FFcrawler: xyz: Whether to write the structure in the LigParGen generated data file as .xyz. Default to False. This is useful because the order and the name of the atoms could be - different from the initial input. + different from the initial input.) gromacs: Whether to save GROMACS format data files. Default to False. Examples: - >>> lpg = FFcrawler('/path/to/work/dir', '/path/to/chromedriver') >>> lpg.data_from_pdb("/path/to/pdb") """ @@ -149,7 +145,7 @@ class FFcrawler: def __init__( self, write_dir: str, - chromedriver_dir: Optional[str] = None, + chromedriver_dir: str | None = None, headless: bool = True, xyz: bool = False, gromacs: bool = False, @@ -187,10 +183,7 @@ def __init__( print("LigParGen server connected.") def quit(self): - """ - Method for quiting ChromeDriver. - - """ + """Method for quiting ChromeDriver.""" self.web.quit() def data_from_pdb(self, pdb_dir: str): From 74587f9adf20f843e57082f2255a6012fc4eaedf Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:57:44 +0800 Subject: [PATCH 20/21] lint fixes --- mdgo/forcefield/__init__.py | 2 +- mdgo/forcefield/crawler.py | 4 +--- tests/test_forcefield.py | 29 ++++++++++++----------------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/mdgo/forcefield/__init__.py b/mdgo/forcefield/__init__.py index b9dd20d3..b6ee340f 100644 --- a/mdgo/forcefield/__init__.py +++ b/mdgo/forcefield/__init__.py @@ -18,5 +18,5 @@ from .aqueous import Aqueous, IonLJData from .charge import ChargeWriter from .maestro import MaestroRunner +from .mdgoligpargen import FFcrawler, LigpargenRunner from .pubchem import PubChemRunner -from .mdgoligpargen import LigpargenRunner, FFcrawler diff --git a/mdgo/forcefield/crawler.py b/mdgo/forcefield/crawler.py index 27c5c38c..6e94f8cd 100644 --- a/mdgo/forcefield/crawler.py +++ b/mdgo/forcefield/crawler.py @@ -1,8 +1,6 @@ # Copyright (c) Tingzheng Hou. # Distributed under the terms of the MIT License. -""" -This module was used to implement a core class FFcrawler, and has been moved to ligpargen module. -""" +"""This module was used to implement a core class FFcrawler, and has been moved to ligpargen module.""" diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py index e3fda47f..ce9c9411 100644 --- a/tests/test_forcefield.py +++ b/tests/test_forcefield.py @@ -9,10 +9,11 @@ import numpy as np import pytest +from numpy.testing import assert_equal from pymatgen.io.lammps.data import LammpsData from mdgo.forcefield.aqueous import Aqueous, Ion -from mdgo.forcefield.crawler import FFcrawler +from mdgo.forcefield.mdgoligpargen import FFcrawler, LigpargenRunner test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") @@ -36,33 +37,27 @@ def test_run(self) -> None: lpg = LigpargenRunner(os.path.join(test_dir, "EC.pdb"), download_dir, xyz=True) lpg.run() - self.assertIn( - "Input format: .pdb\n" "LigParGen finished succesfully!\n" ".xyz file saved.", - out.getvalue(), - ) - self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp"))) - self.assertTrue(os.path.exists(os.path.join(download_dir, "EC.lmp.xyz"))) + assert "Input format: .pdb\nLigParGen finished succesfully!\n.xyz file saved." in out.getvalue() + assert os.path.exists(os.path.join(download_dir, "EC.lmp")) + assert os.path.exists(os.path.join(download_dir, "EC.lmp.xyz")) with open(os.path.join(download_dir, "EC.lmp")) as f: pdf_actual = f.readlines() - self.assertListEqual(pdf, pdf_actual) + assert_equal(pdf, pdf_actual) with open(os.path.join(download_dir, "EC.lmp.xyz")) as f: xyz_actual = f.readlines() - self.assertListEqual(xyz, xyz_actual) + assert_equal(xyz, xyz_actual) lpg = LigpargenRunner("CCOC(=O)O", download_dir, xyz=True) lpg.run() - self.assertIn( - "Input format: SMILES\n" "LigParGen finished succesfully!\n" ".xyz file saved.", - out.getvalue(), - ) - self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp"))) - self.assertTrue(os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz"))) + assert "Input format: SMILES\nLigParGen finished succesfully!\n.xyz file saved." in out.getvalue() + assert os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp")) + assert os.path.exists(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz")) with open(os.path.join(download_dir, "CCOC(=O)O.lmp")) as f: smiles_actual = f.readlines() - self.assertListEqual(smiles, smiles_actual) + assert_equal(smiles, smiles_actual) with open(os.path.join(download_dir, "CCOC(=O)O.lmp.xyz")) as f: xyz_actual = f.readlines() - self.assertListEqual(xyz_smiles, xyz_actual) + assert_equal(xyz_smiles, xyz_actual) finally: sys.stdout = saved_stdout From 3f0e83f362f13874f96b80a696f416b7d27af79b Mon Sep 17 00:00:00 2001 From: Tingzheng Hou <25351437+htz1992213@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:59:49 +0800 Subject: [PATCH 21/21] ruff format fix --- mdgo/forcefield/crawler.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mdgo/forcefield/crawler.py b/mdgo/forcefield/crawler.py index 6e94f8cd..d746fa34 100644 --- a/mdgo/forcefield/crawler.py +++ b/mdgo/forcefield/crawler.py @@ -2,5 +2,3 @@ # Distributed under the terms of the MIT License. """This module was used to implement a core class FFcrawler, and has been moved to ligpargen module.""" - -