diff --git a/arc/scheduler.py b/arc/scheduler.py index 5add628665..49094cd2fd 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -325,6 +325,8 @@ def __init__(self, self.orbitals_level = orbitals_level self.unique_species_labels = list() self.save_restart = False + if self.restart_dict is not None: + self._sanitize_restart_output() if len(self.rxn_list): rxn_info_path = self.make_reaction_labels_info_file() @@ -2619,7 +2621,6 @@ def switch_ts(self, label: str): logger.info(f'Switching a TS guess for {label}...') self.determine_most_likely_ts_conformer(label=label) # Look for a different TS guess. self.delete_all_species_jobs(label=label) # Delete other currently running jobs for this TS. - self.output[label]['geo'] = self.output[label]['freq'] = self.output[label]['sp'] = self.output[label]['composite'] = '' freq_path = os.path.join(self.project_directory, 'output', 'rxns', label, 'geometry', 'freq.out') if os.path.isfile(freq_path): os.remove(freq_path) @@ -3044,6 +3045,9 @@ def check_all_done(self, label: str): logger.debug(f'Species {label} did not converge.') all_converged = False break + if all_converged and self._missing_required_paths(label): + logger.debug(f'Species {label} did not converge due to missing output paths.') + all_converged = False if label in self.output and all_converged: self.output[label]['convergence'] = True if self.species_dict[label].is_ts: @@ -3084,6 +3088,64 @@ def check_all_done(self, label: str): # Update restart dictionary and save the yaml restart file: self.save_restart_dict() + def _missing_required_paths(self, label: str) -> bool: + """ + Check whether required output paths are missing for a species/TS. + + Args: + label (str): The species label. + + Returns: + bool: Whether required output paths are missing. + """ + return bool(self._get_missing_required_paths(label)) + + def _get_missing_required_paths(self, label: str) -> set: + """ + Get missing required output path job types for a species/TS. + + Args: + label (str): The species label. + + Returns: + set: Job types with missing required output paths. + """ + if label not in self.output or 'paths' not in self.output[label]: + return set() + path_map = { + 'opt': 'geo', + 'freq': 'freq', + 'sp': 'sp', + 'composite': 'composite', + } + missing = set() + for job_type, path_key in path_map.items(): + if job_type == 'composite': + required = self.composite_method is not None + else: + required = self.job_types.get(job_type, False) + if not required: + continue + if self.species_dict[label].number_of_atoms == 1 and job_type in ['opt', 'freq']: + continue + if self.output[label]['job_types'].get(job_type, False) and not self.output[label]['paths'].get(path_key, ''): + missing.add(job_type) + return missing + + def _sanitize_restart_output(self) -> None: + """ + Ensure restart output state is internally consistent (e.g., convergence without paths). + """ + for label in list(self.output.keys()): + if label not in self.species_dict: + continue + missing_job_types = self._get_missing_required_paths(label) + if self.output[label].get('convergence') and missing_job_types: + self.output[label]['convergence'] = False + if 'job_types' in self.output[label]: + for job_type in missing_job_types: + self.output[label]['job_types'][job_type] = False + def get_server_job_ids(self, specific_server: Optional[str] = None): """ Check job status on a specific server or on all active servers, get a list of relevant running job IDs. @@ -3547,7 +3609,13 @@ def delete_all_species_jobs(self, label: str): logger.info(f'Deleted job {job_name}') job.delete() self.running_jobs[label] = list() - self.output[label]['paths'] = {key: '' if key != 'irc' else list() for key in self.output[label]['paths'].keys()} + if label in self.output: + self.output[label]['convergence'] = False + for key in ['opt', 'freq', 'sp', 'composite', 'fine']: + if key in self.output[label]['job_types']: + self.output[label]['job_types'][key] = False + self.output[label]['paths'] = {key: '' if key != 'irc' else list() + for key in self.output[label]['paths'].keys()} def restore_running_jobs(self): """ diff --git a/arc/scheduler_test.py b/arc/scheduler_test.py index edba05adef..87ce535be4 100644 --- a/arc/scheduler_test.py +++ b/arc/scheduler_test.py @@ -8,6 +8,7 @@ import unittest import os import shutil +from unittest import mock import arc.parser.parser as parser from arc.checks.ts import check_ts @@ -726,6 +727,124 @@ def test_save_e_elect(self): self.assertEqual(content, {'formaldehyde': -300621.95378630824, 'mehylamine': -251360.00924747565}) shutil.rmtree(project_directory, ignore_errors=True) + def test_check_all_done_requires_paths(self): + """Test that convergence isn't set when required paths are missing.""" + spc = ARCSpecies(label='formaldehyde', smiles='C=O') + output = { + 'formaldehyde': { + 'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''}, + 'restart': '', + 'convergence': False, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': False, 'composite': False}, + } + } + sched = Scheduler(project='test_check_all_done_requires_paths', + ess_settings=self.ess_settings, + species_list=[spc], + opt_level=Level(repr=default_levels_of_theory['opt']), + freq_level=Level(repr=default_levels_of_theory['freq']), + sp_level=Level(repr=default_levels_of_theory['sp']), + project_directory=self.project_directory, + testing=True, + job_types=initialize_job_types(), + restart_dict={'output': output}, + ) + sched.check_all_done(label='formaldehyde') + self.assertFalse(sched.output['formaldehyde']['convergence']) + + def test_restart_sanitizes_convergence_for_ts(self): + """Test restart output sanitization for TS with missing paths.""" + ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0) + output = { + 'TS0': { + 'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''}, + 'restart': '', + 'convergence': True, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': False, 'composite': False}, + } + } + sched = Scheduler(project='test_restart_sanitizes_convergence_for_ts', + ess_settings=self.ess_settings, + species_list=[ts_spc], + opt_level=Level(repr=default_levels_of_theory['opt']), + freq_level=Level(repr=default_levels_of_theory['freq']), + sp_level=Level(repr=default_levels_of_theory['sp']), + project_directory=self.project_directory, + testing=True, + job_types=initialize_job_types(), + restart_dict={'output': output, 'running_jobs': {'TS0': []}}, + ) + self.assertFalse(sched.output['TS0']['convergence']) + for key in ['opt', 'freq', 'sp', 'composite']: + self.assertFalse(sched.output['TS0']['job_types'][key]) + + def test_delete_all_species_jobs_resets_output(self): + """Test that deleting jobs clears convergence and job status.""" + spc = ARCSpecies(label='formaldehyde', smiles='C=O') + output = { + 'formaldehyde': { + 'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''}, + 'restart': '', + 'convergence': True, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': True, 'composite': False}, + } + } + sched = Scheduler(project='test_delete_all_species_jobs_resets_output', + ess_settings=self.ess_settings, + species_list=[spc], + opt_level=Level(repr=default_levels_of_theory['opt']), + freq_level=Level(repr=default_levels_of_theory['freq']), + sp_level=Level(repr=default_levels_of_theory['sp']), + project_directory=self.project_directory, + testing=True, + job_types=initialize_job_types(), + restart_dict={'output': output}, + ) + sched.job_dict['formaldehyde'] = {'opt': {}, 'freq': {}, 'sp': {}} + sched.running_jobs['formaldehyde'] = [] + sched.delete_all_species_jobs(label='formaldehyde') + self.assertFalse(sched.output['formaldehyde']['convergence']) + for key in ['opt', 'freq', 'sp', 'composite', 'fine']: + self.assertFalse(sched.output['formaldehyde']['job_types'][key]) + self.assertEqual(sched.output['formaldehyde']['paths'], + {'geo': '', 'freq': '', 'sp': '', 'composite': ''}) + + def test_switch_ts_resets_output(self): + """Test that switching TS guesses resets convergence and paths.""" + ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0) + output = { + 'TS0': { + 'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''}, + 'restart': '', + 'convergence': True, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': True, 'composite': False}, + } + } + sched = Scheduler(project='test_switch_ts_resets_output', + ess_settings=self.ess_settings, + species_list=[ts_spc], + opt_level=Level(repr=default_levels_of_theory['opt']), + freq_level=Level(repr=default_levels_of_theory['freq']), + sp_level=Level(repr=default_levels_of_theory['sp']), + project_directory=self.project_directory, + testing=True, + job_types=initialize_job_types(), + restart_dict={'output': output}, + ) + sched.species_dict['TS0'].ts_guesses_exhausted = True + sched.species_dict['TS0'].chosen_ts = None + with mock.patch.object(Scheduler, 'determine_most_likely_ts_conformer', return_value=None): + sched.switch_ts(label='TS0') + self.assertFalse(sched.output['TS0']['convergence']) + for key in ['opt', 'freq', 'sp', 'composite', 'fine']: + self.assertFalse(sched.output['TS0']['job_types'][key]) + self.assertEqual(sched.output['TS0']['paths'], + {'geo': '', 'freq': '', 'sp': '', 'composite': ''}) + def test_species_has_geo_sp_freq(self): """Test the species_has_geo() / species_has_sp() / species_has_freq() functions.""" for property_, species_has_property in zip(['geo', 'sp', 'freq'], [species_has_geo, species_has_sp, species_has_freq]): diff --git a/arc/species/species.py b/arc/species/species.py index 0fe014d080..6ec8b193f3 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1536,12 +1536,12 @@ def make_ts_report(self): self.ts_report += ':\n' if self.successful_methods: self.ts_report += 'Methods that successfully generated a TS guess:\n' - for successful_method in self.successful_methods: - self.ts_report += successful_method + ',' + unique_successful_methods = list(dict.fromkeys(self.successful_methods)) + self.ts_report += ','.join(unique_successful_methods) if self.unsuccessful_methods: self.ts_report += '\nMethods that were unsuccessfully in generating a TS guess:\n' - for unsuccessful_method in self.unsuccessful_methods: - self.ts_report += unsuccessful_method + ',' + unique_unsuccessful_methods = list(dict.fromkeys(self.unsuccessful_methods)) + self.ts_report += ','.join(unique_unsuccessful_methods) if not self.ts_guesses_exhausted: self.ts_report += f'\nThe method that generated the best TS guess and its output used for the ' \ f'optimization: {self.chosen_ts_method}\n' diff --git a/functional/restart_test.py b/functional/restart_test.py index fa3407b863..8dd7852d29 100644 --- a/functional/restart_test.py +++ b/functional/restart_test.py @@ -9,10 +9,11 @@ import shutil import unittest import warnings +from unittest import mock from arc.molecule.molecule import Molecule -from arc.common import ARC_PATH, read_yaml_file +from arc.common import ARC_PATH, read_yaml_file, save_yaml_file from arc.main import ARC @@ -20,6 +21,7 @@ class TestRestart(unittest.TestCase): """ Contains unit tests for restarting ARC. """ + created_projects = set() @classmethod def setUpClass(cls): @@ -37,6 +39,7 @@ def test_restart_thermo(self): restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo') restart_path = os.path.join(restart_dir, 'restart.yml') project = 'arc_project_for_testing_delete_after_usage_restart_thermo' + self.created_projects.add(project) project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True) @@ -134,6 +137,7 @@ def test_restart_rate_1(self): restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate') restart_path = os.path.join(restart_dir, 'restart.yml') project = 'arc_project_for_testing_delete_after_usage_restart_rate_1' + self.created_projects.add(project) project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -155,6 +159,7 @@ def test_restart_rate_1(self): def test_restart_rate_2(self): """Test restarting ARC and attaining a reaction rate coefficient""" project = 'arc_project_for_testing_delete_after_usage_restart_rate_2' + self.created_projects.add(project) project_directory = os.path.join(ARC_PATH, 'Projects', project) base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1') restart_path = os.path.join(base_path, 'restart.yml') @@ -184,6 +189,7 @@ def test_restart_bde (self): restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde') restart_path = os.path.join(restart_dir, 'restart.yml') project = 'test_restart_bde' + self.created_projects.add(project) project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -212,17 +218,53 @@ def test_globalize_paths(self): content['output']['spc']['paths']['freq']) self.assertNotIn('gpfs/workspace/users/user', content['output']['spc']['paths']['freq']) + def test_restart_sanitizes_ts_output(self): + """Test sanitizing inconsistent TS output on restart.""" + project = 'arc_project_for_testing_delete_after_usage_restart_sanitize_ts' + self.created_projects.add(project) + project_directory = os.path.join(ARC_PATH, 'Projects', project) + os.makedirs(project_directory, exist_ok=True) + restart_path = os.path.join(project_directory, 'restart.yml') + restart_dict = { + 'project': project, + 'project_directory': project_directory, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': False}, + 'species': [{'label': 'TS0', 'is_ts': True, 'multiplicity': 1, 'charge': 0}], + 'output': { + 'TS0': { + 'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''}, + 'restart': '', + 'convergence': True, + 'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True, + 'rotors': False, 'irc': False, 'fine': False, 'composite': False}, + } + }, + 'running_jobs': {'TS0': []}, + } + save_yaml_file(path=restart_path, content=restart_dict) + input_dict = read_yaml_file(path=restart_path, project_directory=project_directory) + input_dict['project'], input_dict['project_directory'] = project, project_directory + with mock.patch('arc.scheduler.Scheduler.schedule_jobs', return_value=None), \ + mock.patch('arc.scheduler.Scheduler.run_opt_job', return_value=None), \ + mock.patch('arc.main.process_arc_project', return_value=None): + arc1 = ARC(**input_dict) + arc1.execute() + self.assertFalse(arc1.scheduler.output['TS0']['convergence']) + @classmethod def tearDownClass(cls): """ A function that is run ONCE after all unit tests in this class. Delete all project directories created during these unit tests """ - projects = ['arc_project_for_testing_delete_after_usage_restart_thermo', - 'arc_project_for_testing_delete_after_usage_restart_rate_1', - 'arc_project_for_testing_delete_after_usage_restart_rate_2', - 'test_restart_bde', - ] + projects = cls.created_projects or { + 'arc_project_for_testing_delete_after_usage_restart_thermo', + 'arc_project_for_testing_delete_after_usage_restart_rate_1', + 'arc_project_for_testing_delete_after_usage_restart_rate_2', + 'test_restart_bde', + 'arc_project_for_testing_delete_after_usage_restart_sanitize_ts', + } for project in projects: project_directory = os.path.join(ARC_PATH, 'Projects', project) shutil.rmtree(project_directory, ignore_errors=True)