Skip to content

Commit 54ac6f1

Browse files
committed
Add TS guess generation for hydrolysis reaction families
Implemented TS guess generation for hydrolysis reaction families, including ester, imine, ether, and nitrile hydrolysis. The approach uses z-matrix manipulation, dihedral angle adjustments, and electronegativity-based heuristics to generate TS structures.
1 parent 522387c commit 54ac6f1

File tree

1 file changed

+196
-4
lines changed

1 file changed

+196
-4
lines changed

arc/job/adapters/ts/heuristics.py

Lines changed: 196 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,28 @@
2828
from arkane.statmech import is_linear
2929

3030
from arc.common import almost_equal_coords, get_logger, is_angle_linear, key_by_val
31-
from arc.family import get_reaction_family_products
3231
from arc.job.adapter import JobAdapter
3332
from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family
3433
from arc.job.factory import register_job_adapter
3534
from arc.plotter import save_geo
36-
from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz
35+
from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz , add_atom_to_xyz_using_internal_coords
3736
from arc.mapping.engine import map_two_species
3837
from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms
39-
from arc.species.zmat import get_parameter_from_atom_indices, remove_1st_atom, up_param
38+
from arc.species.zmat import get_parameter_from_atom_indices, remove_1st_atom, up_param, xyz_to_zmat
39+
from arc.family.family import get_reaction_family_products
4040

4141
if TYPE_CHECKING:
4242
from arc.level import Level
4343
from arc.reaction import ARCReaction
4444

4545

46+
FAMILY_SETS = {'set_1': ['ester_hydrolysis', 'imine_hydrolysis','ether_hydrolysis'],
47+
'set_2': ['nitrile_hydrolysis']} #sub-groups of hydrolysis reaction families
48+
4649
DIHEDRAL_INCREMENT = 30
4750

4851
logger = get_logger()
4952

50-
5153
class HeuristicsAdapter(JobAdapter):
5254
"""
5355
A class for executing TS guess jobs based on heuristics.
@@ -263,6 +265,12 @@ def execute_incore(self):
263265
xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment)
264266
tsg.tok()
265267

268+
if rxn.family in FAMILY_SETS['set_1'] or rxn.family in FAMILY_SETS['set_2']:
269+
tsg = TSGuess(method='Heuristics')
270+
tsg.tic()
271+
xyzs = hydrolysis(reaction=rxn)
272+
tsg.tok()
273+
266274
for method_index, xyz in enumerate(xyzs):
267275
unique = True
268276
for other_tsg in rxn.ts_species.ts_guesses:
@@ -1027,6 +1035,62 @@ def get_neighbors_by_electronegativity( spc: ARCSpecies,
10271035
else:
10281036
return sorted_neighbors[0]
10291037

1038+
def generate_ts_guess(initial_xyz: dict,
1039+
water: 'ARCSpecies',
1040+
r_atoms: List[int],
1041+
a_atoms: List[List[int]],
1042+
d_atoms: List[List[int]],
1043+
r_value: List[float],
1044+
a_value: List[float],
1045+
d_values: List[List[float]],
1046+
zmats_total: List[dict],
1047+
threshold: float = 0.8
1048+
) -> Tuple[List[dict], List[dict]]:
1049+
"""
1050+
Generate Z-matrices and Cartesian coordinates for transition state (TS) guesses.
1051+
1052+
Args:
1053+
initial_xyz (dict): The initial coordinates of the reactant.
1054+
water ('ARCSpecies'): The water molecule involved in the reaction.
1055+
r_atoms (List[int]): Atom pairs for defining bond distances.
1056+
a_atoms (List[List[int]]): Atom triplets for defining bond angles.
1057+
d_atoms (List[List[int]]): Atom quartets for defining dihedral angles.
1058+
r_value (List[float]): Bond distances corresponding to each atom pair in `r_atoms`.
1059+
a_value (List[float]): Bond angles corresponding to each atom triplet in `a_atoms`.
1060+
d_values (List[List[float]]): Dihedral angle sets for each TS guess.
1061+
zmats_total (List[dict]): A list of existing Z-matrices to avoid duplicates.
1062+
threshold (float): Threshold to check for atom collisions.
1063+
1064+
Returns:
1065+
Tuple[List[dict], List[dict]]: A tuple containing:
1066+
- List[dict]: Unique Z-matrices for TS guesses without colliding atoms.
1067+
- List[dict]: Corresponding Cartesian coordinates for the TS guesses.
1068+
"""
1069+
xyz_guesses = []
1070+
1071+
for d_value in d_values:
1072+
xyz_guess = copy.deepcopy(initial_xyz)
1073+
for i in range(3):
1074+
xyz_guess = add_atom_to_xyz_using_internal_coords(
1075+
xyz=xyz_guess,
1076+
element=water.mol.atoms[i].element.symbol,
1077+
r_index=r_atoms[i],
1078+
a_indices=a_atoms[i],
1079+
d_indices=d_atoms[i],
1080+
r_value=r_value[i],
1081+
a_value=a_value[i],
1082+
d_value=d_value[i]
1083+
)
1084+
1085+
zmat_guess = xyz_to_zmat(xyz_guess)
1086+
duplicate = any(compare_zmats(existing, zmat_guess) for existing in zmats_total)
1087+
if xyz_guess is not None and not colliding_atoms(xyz_guess, threshold=threshold) and not duplicate:
1088+
xyz_guesses.append(xyz_guess)
1089+
zmats_total.append(zmat_guess)
1090+
else:
1091+
print(f"Colliding atoms or existing guess: {xyz_guess}")
1092+
return xyz_guesses, zmats_total
1093+
10301094
def get_matching_dihedrals(zmat: dict,
10311095
a: int,
10321096
b: int,
@@ -1142,4 +1206,132 @@ def push_up_dihedral(zmat: Dict,
11421206
print(f"Updated dihedral value for {param}: {zmat['vars'][param]}")
11431207

11441208

1209+
def hydrolysis(reaction: 'ARCReaction'):
1210+
"""
1211+
Generate TS guesses for reactions of the ARC "hydrolysis" families.
1212+
1213+
Args:
1214+
reaction: An ARCReaction instance.
1215+
1216+
Returns:
1217+
List[dict]: Cartesian coordinates of TS guesses for all reactions.
1218+
"""
1219+
xyz_guesses_total, zmats_total= [], []
1220+
product_dicts = get_reaction_family_products(
1221+
rxn=reaction,
1222+
rmg_family_set = 'default',
1223+
consider_rmg_families = False,
1224+
consider_arc_families = True,
1225+
)
1226+
ester_and_ether_families = False
1227+
if any('ester_hydrolysis' in d.get('family', []) for d in product_dicts) and \
1228+
any('ether_hydrolysis' in d.get('family', []) for d in product_dicts):
1229+
ester_and_ether_families = True
1230+
counter=0
1231+
while not xyz_guesses_total or (ester_and_ether_families and not any(item['family'] == 'ester_hydrolysis' for item in xyz_guesses_total)):
1232+
counter+=1
1233+
for product_dict in product_dicts:
1234+
arc_reactants, _ = reaction.get_reactants_and_products(arc=True, return_copies=False)
1235+
arc_reactant, water = None, None
1236+
for spc in arc_reactants:
1237+
if not is_water(spc):
1238+
arc_reactant = spc
1239+
else:
1240+
water = spc
1241+
if not arc_reactant or not water:
1242+
raise ValueError("Reactants must include a non-water molecule and water.")
1243+
initial_xyz = arc_reactant.get_xyz()
1244+
initial_zmat = zmat_from_xyz(initial_xyz,consolidate=False)
1245+
print(initial_zmat)
1246+
map_dict = initial_zmat.get('map', {})
1247+
a = product_dict['r_label_map']['*1']
1248+
b = product_dict['r_label_map']['*2']
1249+
real_a = key_by_val(map_dict, a)
1250+
real_b = key_by_val(map_dict, b)
1251+
same_sign_reg=a<b
1252+
same_sign_real=real_a<real_b
1253+
O = len(arc_reactant.mol.atoms)
1254+
H1 = O + 1
1255+
r_atoms = [a, O, O]
1256+
r_value = [1.8, 1.21, 0.97]
1257+
a_atoms = [[b, a], [a, O], [H1, O]]
1258+
family = product_dict['family']
1259+
if family in FAMILY_SETS['set_1']:
1260+
f, d = get_neighbors_by_electronegativity(arc_reactant, a, b, True)
1261+
print(a,b,f,d)
1262+
real_f= key_by_val(map_dict, f)
1263+
real_d = key_by_val(map_dict, d)
1264+
total_dihedrals = count_all_possible_dihedrals(initial_zmat, real_a, real_b, real_f, real_d)
1265+
if (counter > total_dihedrals) and (total_dihedrals!=0):
1266+
print("All possible dihedral adjustments have been tried.")
1267+
return xyz_guesses_total, zmats_total
1268+
d_atoms = [[f, d, a], [b, a, O], [a, H1, O]]
1269+
indices_list = find_matching_dihedral(initial_zmat, real_a, real_b, real_f, real_d,counter)
1270+
for indices in indices_list:
1271+
if indices is not None:
1272+
push_up_dihedral(zmat=initial_zmat, indices=indices, adjustment_factor=0.3)
1273+
1274+
if family == 'ether_hydrolysis':
1275+
if int(same_sign_real)+int(same_sign_reg)==1:
1276+
stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.5)
1277+
else:
1278+
stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.5)
1279+
r_value[0]=2.1
1280+
a_value = [65, 72, 106]
1281+
d_values = [[98.25, -0.72, 103], [-98.25, -0.72, 103], [98.25, -0.72, -103], [-98.25, -0.72, -103]]
1282+
elif family == 'imine_hydrolysis':
1283+
if int(same_sign_real) + int(same_sign_reg) == 1:
1284+
stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.3)
1285+
else:
1286+
stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.3)
1287+
a_value = [78, 70, 111]
1288+
d_values = [[108, 12, 113], [-108, 12, 113], [108, 12, -113], [-108, 12, -113]]
1289+
else:
1290+
if int(same_sign_real) + int(same_sign_reg) == 1:
1291+
stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.3)
1292+
else:
1293+
stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.3)
1294+
a_value = [77, 71, 111]
1295+
d_values = [[140, 1.64, 103], [-140, 1.64, 103], [140, 1.64, -103], [-140, 1.64, -103]]
1296+
initial_xyz = zmat_to_xyz(initial_zmat)
1297+
xyz_guesses, zmats_total = generate_ts_guess(
1298+
initial_xyz, water, r_atoms, a_atoms, d_atoms, r_value, a_value, d_values, zmats_total
1299+
)
1300+
1301+
elif family in FAMILY_SETS['set_2']:
1302+
f, d= get_neighbors_by_electronegativity(arc_reactant, a, b, False), None
1303+
real_f = key_by_val(map_dict, f)
1304+
total_dihedrals = count_all_possible_dihedrals(initial_zmat, real_a, real_b, real_f, None)
1305+
if (counter > total_dihedrals) and (total_dihedrals!=0):
1306+
print("All possible dihedral adjustments have been tried.")
1307+
return xyz_guesses_total, zmats_total
1308+
indices_list = find_matching_dihedral(initial_zmat, real_a, real_b, real_f,None, counter)
1309+
for indices in indices_list:
1310+
if indices is not None:
1311+
push_up_dihedral(zmat=initial_zmat, indices=indices, adjustment_factor=0.3)
1312+
if int(same_sign_real) + int(same_sign_reg) == 1:
1313+
stretch_zmat_bond(zmat=initial_zmat, indices=(min(a, b), max(a, b)), stretch=1.1)
1314+
else:
1315+
stretch_zmat_bond(zmat=initial_zmat, indices=(max(a, b), min(a, b)), stretch=1.1)
1316+
a_atoms = [[b, a], [a, O], [H1, O]]
1317+
a_value = [97, 58, 111]
1318+
d_atoms = [[f, b, a], [b, a, O], [a, H1, O]]
1319+
d_values = [[174, -0.0154, 104], [-174, -0.0154, 104], [174, -0.0154, -104], [-174, -0.0154, -104]]
1320+
initial_xyz = zmat_to_xyz(initial_zmat)
1321+
xyz_guesses, zmats_total = generate_ts_guess(
1322+
initial_xyz, water, r_atoms, a_atoms, d_atoms, r_value, a_value, d_values, zmats_total,threshold=0.6
1323+
)
1324+
1325+
else:
1326+
raise ValueError(f"Family {product_dict['family']} not supported for hydrolysis TS guess generation")
1327+
1328+
if xyz_guesses:
1329+
xyz_guesses_total.append({'family': product_dict['family'], 'indices': [a,b,f,d,O,H1], 'xyz_guesses': xyz_guesses})
1330+
1331+
return xyz_guesses_total, zmats_total
1332+
1333+
11451334
register_job_adapter('heuristics', HeuristicsAdapter)
1335+
1336+
1337+

0 commit comments

Comments
 (0)