|
28 | 28 | from arkane.statmech import is_linear |
29 | 29 |
|
30 | 30 | from arc.common import almost_equal_coords, get_logger, is_angle_linear, key_by_val |
31 | | -from arc.family import get_reaction_family_products |
32 | 31 | from arc.job.adapter import JobAdapter |
33 | 32 | from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family |
34 | 33 | from arc.job.factory import register_job_adapter |
35 | 34 | from arc.plotter import save_geo |
36 | | -from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz |
| 35 | +from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz , add_atom_to_xyz_using_internal_coords |
37 | 36 | from arc.mapping.engine import map_two_species |
38 | 37 | from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms |
39 | | -from arc.species.zmat import get_parameter_from_atom_indices, remove_1st_atom, up_param |
| 38 | +from arc.species.zmat import get_parameter_from_atom_indices, remove_1st_atom, up_param, xyz_to_zmat |
| 39 | +from arc.family.family import get_reaction_family_products |
40 | 40 |
|
41 | 41 | if TYPE_CHECKING: |
42 | 42 | from arc.level import Level |
43 | 43 | from arc.reaction import ARCReaction |
44 | 44 |
|
45 | 45 |
|
| 46 | +FAMILY_SETS = {'set_1': ['ester_hydrolysis', 'imine_hydrolysis','ether_hydrolysis'], |
| 47 | + 'set_2': ['nitrile_hydrolysis']} #sub-groups of hydrolysis reaction families |
| 48 | + |
46 | 49 | DIHEDRAL_INCREMENT = 30 |
47 | 50 |
|
48 | 51 | logger = get_logger() |
49 | 52 |
|
50 | | - |
51 | 53 | class HeuristicsAdapter(JobAdapter): |
52 | 54 | """ |
53 | 55 | A class for executing TS guess jobs based on heuristics. |
@@ -263,6 +265,12 @@ def execute_incore(self): |
263 | 265 | xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment) |
264 | 266 | tsg.tok() |
265 | 267 |
|
| 268 | + if rxn.family in FAMILY_SETS['set_1'] or rxn.family in FAMILY_SETS['set_2']: |
| 269 | + tsg = TSGuess(method='Heuristics') |
| 270 | + tsg.tic() |
| 271 | + xyzs = hydrolysis(reaction=rxn) |
| 272 | + tsg.tok() |
| 273 | + |
266 | 274 | for method_index, xyz in enumerate(xyzs): |
267 | 275 | unique = True |
268 | 276 | for other_tsg in rxn.ts_species.ts_guesses: |
@@ -1027,6 +1035,62 @@ def get_neighbors_by_electronegativity( spc: ARCSpecies, |
1027 | 1035 | else: |
1028 | 1036 | return sorted_neighbors[0] |
1029 | 1037 |
|
| 1038 | +def generate_ts_guess(initial_xyz: dict, |
| 1039 | + water: 'ARCSpecies', |
| 1040 | + r_atoms: List[int], |
| 1041 | + a_atoms: List[List[int]], |
| 1042 | + d_atoms: List[List[int]], |
| 1043 | + r_value: List[float], |
| 1044 | + a_value: List[float], |
| 1045 | + d_values: List[List[float]], |
| 1046 | + zmats_total: List[dict], |
| 1047 | + threshold: float = 0.8 |
| 1048 | + ) -> Tuple[List[dict], List[dict]]: |
| 1049 | + """ |
| 1050 | + Generate Z-matrices and Cartesian coordinates for transition state (TS) guesses. |
| 1051 | +
|
| 1052 | + Args: |
| 1053 | + initial_xyz (dict): The initial coordinates of the reactant. |
| 1054 | + water ('ARCSpecies'): The water molecule involved in the reaction. |
| 1055 | + r_atoms (List[int]): Atom pairs for defining bond distances. |
| 1056 | + a_atoms (List[List[int]]): Atom triplets for defining bond angles. |
| 1057 | + d_atoms (List[List[int]]): Atom quartets for defining dihedral angles. |
| 1058 | + r_value (List[float]): Bond distances corresponding to each atom pair in `r_atoms`. |
| 1059 | + a_value (List[float]): Bond angles corresponding to each atom triplet in `a_atoms`. |
| 1060 | + d_values (List[List[float]]): Dihedral angle sets for each TS guess. |
| 1061 | + zmats_total (List[dict]): A list of existing Z-matrices to avoid duplicates. |
| 1062 | + threshold (float): Threshold to check for atom collisions. |
| 1063 | +
|
| 1064 | + Returns: |
| 1065 | + Tuple[List[dict], List[dict]]: A tuple containing: |
| 1066 | + - List[dict]: Unique Z-matrices for TS guesses without colliding atoms. |
| 1067 | + - List[dict]: Corresponding Cartesian coordinates for the TS guesses. |
| 1068 | + """ |
| 1069 | + xyz_guesses = [] |
| 1070 | + |
| 1071 | + for d_value in d_values: |
| 1072 | + xyz_guess = copy.deepcopy(initial_xyz) |
| 1073 | + for i in range(3): |
| 1074 | + xyz_guess = add_atom_to_xyz_using_internal_coords( |
| 1075 | + xyz=xyz_guess, |
| 1076 | + element=water.mol.atoms[i].element.symbol, |
| 1077 | + r_index=r_atoms[i], |
| 1078 | + a_indices=a_atoms[i], |
| 1079 | + d_indices=d_atoms[i], |
| 1080 | + r_value=r_value[i], |
| 1081 | + a_value=a_value[i], |
| 1082 | + d_value=d_value[i] |
| 1083 | + ) |
| 1084 | + |
| 1085 | + zmat_guess = xyz_to_zmat(xyz_guess) |
| 1086 | + duplicate = any(compare_zmats(existing, zmat_guess) for existing in zmats_total) |
| 1087 | + if xyz_guess is not None and not colliding_atoms(xyz_guess, threshold=threshold) and not duplicate: |
| 1088 | + xyz_guesses.append(xyz_guess) |
| 1089 | + zmats_total.append(zmat_guess) |
| 1090 | + else: |
| 1091 | + print(f"Colliding atoms or existing guess: {xyz_guess}") |
| 1092 | + return xyz_guesses, zmats_total |
| 1093 | + |
1030 | 1094 | def get_matching_dihedrals(zmat: dict, |
1031 | 1095 | a: int, |
1032 | 1096 | b: int, |
@@ -1142,4 +1206,132 @@ def push_up_dihedral(zmat: Dict, |
1142 | 1206 | print(f"Updated dihedral value for {param}: {zmat['vars'][param]}") |
1143 | 1207 |
|
1144 | 1208 |
|
| 1209 | +def hydrolysis(reaction: 'ARCReaction'): |
| 1210 | + """ |
| 1211 | + Generate TS guesses for reactions of the ARC "hydrolysis" families. |
| 1212 | +
|
| 1213 | + Args: |
| 1214 | + reaction: An ARCReaction instance. |
| 1215 | +
|
| 1216 | + Returns: |
| 1217 | + List[dict]: Cartesian coordinates of TS guesses for all reactions. |
| 1218 | + """ |
| 1219 | + xyz_guesses_total, zmats_total= [], [] |
| 1220 | + product_dicts = get_reaction_family_products( |
| 1221 | + rxn=reaction, |
| 1222 | + rmg_family_set = 'default', |
| 1223 | + consider_rmg_families = False, |
| 1224 | + consider_arc_families = True, |
| 1225 | + ) |
| 1226 | + ester_and_ether_families = False |
| 1227 | + if any('ester_hydrolysis' in d.get('family', []) for d in product_dicts) and \ |
| 1228 | + any('ether_hydrolysis' in d.get('family', []) for d in product_dicts): |
| 1229 | + ester_and_ether_families = True |
| 1230 | + counter=0 |
| 1231 | + while not xyz_guesses_total or (ester_and_ether_families and not any(item['family'] == 'ester_hydrolysis' for item in xyz_guesses_total)): |
| 1232 | + counter+=1 |
| 1233 | + for product_dict in product_dicts: |
| 1234 | + arc_reactants, _ = reaction.get_reactants_and_products(arc=True, return_copies=False) |
| 1235 | + arc_reactant, water = None, None |
| 1236 | + for spc in arc_reactants: |
| 1237 | + if not is_water(spc): |
| 1238 | + arc_reactant = spc |
| 1239 | + else: |
| 1240 | + water = spc |
| 1241 | + if not arc_reactant or not water: |
| 1242 | + raise ValueError("Reactants must include a non-water molecule and water.") |
| 1243 | + initial_xyz = arc_reactant.get_xyz() |
| 1244 | + initial_zmat = zmat_from_xyz(initial_xyz,consolidate=False) |
| 1245 | + print(initial_zmat) |
| 1246 | + map_dict = initial_zmat.get('map', {}) |
| 1247 | + a = product_dict['r_label_map']['*1'] |
| 1248 | + b = product_dict['r_label_map']['*2'] |
| 1249 | + real_a = key_by_val(map_dict, a) |
| 1250 | + real_b = key_by_val(map_dict, b) |
| 1251 | + same_sign_reg=a<b |
| 1252 | + same_sign_real=real_a<real_b |
| 1253 | + O = len(arc_reactant.mol.atoms) |
| 1254 | + H1 = O + 1 |
| 1255 | + r_atoms = [a, O, O] |
| 1256 | + r_value = [1.8, 1.21, 0.97] |
| 1257 | + a_atoms = [[b, a], [a, O], [H1, O]] |
| 1258 | + family = product_dict['family'] |
| 1259 | + if family in FAMILY_SETS['set_1']: |
| 1260 | + f, d = get_neighbors_by_electronegativity(arc_reactant, a, b, True) |
| 1261 | + print(a,b,f,d) |
| 1262 | + real_f= key_by_val(map_dict, f) |
| 1263 | + real_d = key_by_val(map_dict, d) |
| 1264 | + total_dihedrals = count_all_possible_dihedrals(initial_zmat, real_a, real_b, real_f, real_d) |
| 1265 | + if (counter > total_dihedrals) and (total_dihedrals!=0): |
| 1266 | + print("All possible dihedral adjustments have been tried.") |
| 1267 | + return xyz_guesses_total, zmats_total |
| 1268 | + d_atoms = [[f, d, a], [b, a, O], [a, H1, O]] |
| 1269 | + indices_list = find_matching_dihedral(initial_zmat, real_a, real_b, real_f, real_d,counter) |
| 1270 | + for indices in indices_list: |
| 1271 | + if indices is not None: |
| 1272 | + push_up_dihedral(zmat=initial_zmat, indices=indices, adjustment_factor=0.3) |
| 1273 | + |
| 1274 | + if family == 'ether_hydrolysis': |
| 1275 | + if int(same_sign_real)+int(same_sign_reg)==1: |
| 1276 | + stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.5) |
| 1277 | + else: |
| 1278 | + stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.5) |
| 1279 | + r_value[0]=2.1 |
| 1280 | + a_value = [65, 72, 106] |
| 1281 | + d_values = [[98.25, -0.72, 103], [-98.25, -0.72, 103], [98.25, -0.72, -103], [-98.25, -0.72, -103]] |
| 1282 | + elif family == 'imine_hydrolysis': |
| 1283 | + if int(same_sign_real) + int(same_sign_reg) == 1: |
| 1284 | + stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.3) |
| 1285 | + else: |
| 1286 | + stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.3) |
| 1287 | + a_value = [78, 70, 111] |
| 1288 | + d_values = [[108, 12, 113], [-108, 12, 113], [108, 12, -113], [-108, 12, -113]] |
| 1289 | + else: |
| 1290 | + if int(same_sign_real) + int(same_sign_reg) == 1: |
| 1291 | + stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.3) |
| 1292 | + else: |
| 1293 | + stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.3) |
| 1294 | + a_value = [77, 71, 111] |
| 1295 | + d_values = [[140, 1.64, 103], [-140, 1.64, 103], [140, 1.64, -103], [-140, 1.64, -103]] |
| 1296 | + initial_xyz = zmat_to_xyz(initial_zmat) |
| 1297 | + xyz_guesses, zmats_total = generate_ts_guess( |
| 1298 | + initial_xyz, water, r_atoms, a_atoms, d_atoms, r_value, a_value, d_values, zmats_total |
| 1299 | + ) |
| 1300 | + |
| 1301 | + elif family in FAMILY_SETS['set_2']: |
| 1302 | + f, d= get_neighbors_by_electronegativity(arc_reactant, a, b, False), None |
| 1303 | + real_f = key_by_val(map_dict, f) |
| 1304 | + total_dihedrals = count_all_possible_dihedrals(initial_zmat, real_a, real_b, real_f, None) |
| 1305 | + if (counter > total_dihedrals) and (total_dihedrals!=0): |
| 1306 | + print("All possible dihedral adjustments have been tried.") |
| 1307 | + return xyz_guesses_total, zmats_total |
| 1308 | + indices_list = find_matching_dihedral(initial_zmat, real_a, real_b, real_f,None, counter) |
| 1309 | + for indices in indices_list: |
| 1310 | + if indices is not None: |
| 1311 | + push_up_dihedral(zmat=initial_zmat, indices=indices, adjustment_factor=0.3) |
| 1312 | + if int(same_sign_real) + int(same_sign_reg) == 1: |
| 1313 | + stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.1) |
| 1314 | + else: |
| 1315 | + stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.1) |
| 1316 | + a_atoms = [[b, a], [a, O], [H1, O]] |
| 1317 | + a_value = [97, 58, 111] |
| 1318 | + d_atoms = [[f, b, a], [b, a, O], [a, H1, O]] |
| 1319 | + d_values = [[174, -0.0154, 104], [-174, -0.0154, 104], [174, -0.0154, -104], [-174, -0.0154, -104]] |
| 1320 | + initial_xyz = zmat_to_xyz(initial_zmat) |
| 1321 | + xyz_guesses, zmats_total = generate_ts_guess( |
| 1322 | + initial_xyz, water, r_atoms, a_atoms, d_atoms, r_value, a_value, d_values, zmats_total,threshold=0.6 |
| 1323 | + ) |
| 1324 | + |
| 1325 | + else: |
| 1326 | + raise ValueError(f"Family {product_dict['family']} not supported for hydrolysis TS guess generation") |
| 1327 | + |
| 1328 | + if xyz_guesses: |
| 1329 | + xyz_guesses_total.append({'family': product_dict['family'], 'indices': [a,b,f,d,O,H1], 'xyz_guesses': xyz_guesses}) |
| 1330 | + |
| 1331 | + return xyz_guesses_total, zmats_total |
| 1332 | + |
| 1333 | + |
1145 | 1334 | register_job_adapter('heuristics', HeuristicsAdapter) |
| 1335 | + |
| 1336 | + |
| 1337 | + |
0 commit comments