Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ requirements:
- conda-forge::gprof2dot
- conda-forge::numdifftools
- conda-forge::quantities !=0.16.0,!=0.16.1
- rmg::pysidt-rmg >=1.2
- rmg::pydas >=1.0.3
- rmg::pydqed >=1.0.3
- rmg::symmetry
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ concurrency:
env:
# if running on RMG-Py but requiring changes on an un-merged branch of RMG-database, replace
# main with the name of the branch
RMG_DATABASE_BRANCH: main
RMG_DATABASE_BRANCH: sidt_adsorption_corrections
# RMS branch to use for ReactionMechanismSimulator installation
RMS_BRANCH: for_rmg
# RMS mode used for install_rms.sh
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ dependencies:
- conda-forge::cclib >=1.6.3,<1.9
- conda-forge::openbabel >= 3
- conda-forge::rdkit >=2022.09.1
- rmg::pysidt-rmg >=1.2

# Python tools
- conda-forge::python >=3.9,<3.12 # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps)
Expand Down
272 changes: 199 additions & 73 deletions rmgpy/data/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@

import numpy as np

from pysidt import read_nodes, MultiTargetSingleEvalSubgraphIsomorphicDecisionTree
from pysidt.utils import find_shortest_paths

import rmgpy.constants as constants
import rmgpy.molecule
import rmgpy.quantity
Expand Down Expand Up @@ -857,6 +860,8 @@ def __init__(self):
self.libraries = {}
self.surface = {}
self.groups = {}
self.sidts = {}
self.sidt_taggings_and_decompositions = {}
self.adsorption_groups = "adsorptionPt111"
self.library_order = []
self.local_context = {
Expand All @@ -883,6 +888,7 @@ def __reduce__(self):
'depository': self.depository,
'libraries': self.libraries,
'groups': self.groups,
'sidts': self.sidts,
'library_order': self.library_order,
'surface' : self.surface,
}
Expand All @@ -895,6 +901,7 @@ def __setstate__(self, d):
self.depository = d['depository']
self.libraries = d['libraries']
self.groups = d['groups']
self.sidts = d['sidts']
self.library_order = d['library_order']
self.surface = d['surface']

Expand All @@ -909,6 +916,7 @@ def load(self, path, libraries=None, depository=True, surface=False):
self.depository = {}
self.load_libraries(os.path.join(path, 'libraries'), libraries)
self.load_groups(os.path.join(path, 'groups'))
self.load_sidts(os.path.join(path,'sidt'))
if surface:
self.load_surface()

Expand Down Expand Up @@ -1004,6 +1012,72 @@ def load_groups(self, path):

self.record_ring_generic_nodes()
self.record_polycylic_generic_nodes()

def load_sidts(self, path):
"""
Load thermo SIDTs from the given `path` on disk, where `path`
points to the top-level folder of the thermo database.
"""
logging.info('Loading thermodynamics SIDTs from {0}...'.format(path))
categories = [
"Pt111_monodentate_adsorption_corrections",
"Pt111_bidentate_adsorption_corrections",
"Pt111_vdw_adsorption_corrections",
]

def add_monodentate_tag(m):
for a in m.atoms:
if a.is_surface_site():
adatom = list(a.bonds.keys())[0]
adatom.label = "*"
break

def add_bidentate_tag(m):
structs = []
sites = m.get_surface_sites()
s1 = sites[0]
s2 = sites[1]
paths = find_shortest_paths(s1,s2)
path = paths[0]
for a in path:
a.label = "*"

def add_vdw_tag(m):
for a in m.atoms:
if a.is_surface_site():
a.label = "*"
break

def multidentate_decomposition(m):
structs = []
sites = m.get_surface_sites()
for tup in itertools.combinations(sites,2):
s1 = tup[0]
s2 = tup[1]
paths = find_shortest_paths(s1,s2)
path = paths[0]
inds = []
for p in path:
inds.append(m.atoms.index(p))
mol = m.copy(deep=True)
for ind in inds:
mol.atoms[ind].label = "*"
structs.append(mol)

return structs

self.sidt_taggings_and_decompositions = {
"Pt111_monodentate_adsorption_corrections": add_monodentate_tag,
"Pt111_bidentate_adsorption_corrections": add_bidentate_tag,
"Pt111_vdw_adsorption_corrections": add_vdw_tag,
"Pt111_multidentate_adsorption_corrections": multidentate_decomposition, #special
}

for category in categories:
if os.path.exists(os.path.join(path,category+".json")):
nodes = read_nodes(os.path.join(path,category+".json"))
tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes)
self.sidts[category] = tree

def save(self, path):
"""
Expand Down Expand Up @@ -1293,7 +1367,7 @@ def get_thermo_data(self, species, metal_to_scale_to=None, training_set=None):
if species.contains_surface_site():
try:
thermo0 = self.get_thermo_data_for_surface_species(species)
metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1]
metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1].replace("SIDT","",1)
if metal_to_scale_from != metal_to_scale_to:
thermo0 = self.correct_binding_energy(thermo0, species, metal_to_scale_from=metal_to_scale_from, metal_to_scale_to=metal_to_scale_to) # group adsorption values come from Pt111
return thermo0
Expand Down Expand Up @@ -1683,81 +1757,133 @@ def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molec
molecule ([Molecule]): the molecule to apply the thermo correction
surface_sites ([list([Atom])]): a list of the surface site atoms in the molecule
"""

number_of_surface_sites = len(surface_sites)

matches = []
for atom in surface_sites:
labeled_atoms = {'*': atom}
node = adsorption_groups.descend_tree(molecule, labeled_atoms)
if node is None:
# no match, so try the next surface site
continue
while node is not None and node.data is None:
node = node.parent
if node is None:
# no data, so try the next surface site
continue
data = node.data
comment = node.label
loop_count = 0
while isinstance(data, str):
loop_count += 1
if loop_count > 100:
raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular"
f" reference may exist. Last node was {node.label} pointing to group called {data} in "
f"database {adsorption_groups.label}")

for entry in adsorption_groups.entries.values():
if entry.label == data:
data = entry.data
comment = entry.label
break
else:
raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} "
f"in database {adsorption_groups.label}")
data.comment = f'{adsorption_groups.label}({comment})'
group_surface_sites = node.item.get_surface_sites()
if len(group_surface_sites) == number_of_surface_sites:
# all the surface sites are accounted for so add the adsorption group and return
add_thermo_data(adsorption_thermo, data, group_additivity=True)
return True
else:
# we have not found a full match yet, so append and keep looking
matches.append((len(group_surface_sites),data))

if len(matches) == 0:
raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}")
matches.sort(key = lambda x: -x[0])
# sort the matches by descending number of surface sites
corrections_applied = 0
# start a counter for the number of corrections applied
for number_of_group_sites, data in matches:
if number_of_surface_sites - number_of_group_sites < 0:
# too many sites in this group, skip to the next one
continue
if not corrections_applied:
# this is the first correction, so add H298, S298, and Cp
add_thermo_data(adsorption_thermo, data, group_additivity=True)
else:
# We have already corrected S298 and Cp, so we only want to correct H298
adsorption_thermo.H298.value_si += data.H298.value_si
adsorption_thermo.comment += ' + H298({0})'.format(data.comment)
corrections_applied += 1
number_of_surface_sites -= number_of_group_sites
if number_of_surface_sites <= 0:
# we have corrected for all the sites
if number_of_surface_sites < 0:
adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!'
adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)'
break

if number_of_surface_sites > 0:
adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites)
adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)'

return True
if "SIDT" not in self.adsorption_groups:
matches = []
for atom in surface_sites:
labeled_atoms = {'*': atom}
node = adsorption_groups.descend_tree(molecule, labeled_atoms)
if node is None:
# no match, so try the next surface site
continue
while node is not None and node.data is None:
node = node.parent
if node is None:
# no data, so try the next surface site
continue
data = node.data
comment = node.label
loop_count = 0
while isinstance(data, str):
loop_count += 1
if loop_count > 100:
raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular"
f" reference may exist. Last node was {node.label} pointing to group called {data} in "
f"database {adsorption_groups.label}")

for entry in adsorption_groups.entries.values():
if entry.label == data:
data = entry.data
comment = entry.label
break
else:
raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} "
f"in database {adsorption_groups.label}")
data.comment = f'{adsorption_groups.label}({comment})'
group_surface_sites = node.item.get_surface_sites()
if len(group_surface_sites) == number_of_surface_sites:
# all the surface sites are accounted for so add the adsorption group and return
add_thermo_data(adsorption_thermo, data, group_additivity=True)
return True
else:
# we have not found a full match yet, so append and keep looking
matches.append((len(group_surface_sites),data))

if len(matches) == 0:
raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}")
matches.sort(key = lambda x: -x[0])
# sort the matches by descending number of surface sites
corrections_applied = 0
# start a counter for the number of corrections applied
for number_of_group_sites, data in matches:
if number_of_surface_sites - number_of_group_sites < 0:
# too many sites in this group, skip to the next one
continue
if not corrections_applied:
# this is the first correction, so add H298, S298, and Cp
add_thermo_data(adsorption_thermo, data, group_additivity=True)
else:
# We have already corrected S298 and Cp, so we only want to correct H298
adsorption_thermo.H298.value_si += data.H298.value_si
adsorption_thermo.comment += ' + H298({0})'.format(data.comment)
corrections_applied += 1
number_of_surface_sites -= number_of_group_sites
if number_of_surface_sites <= 0:
# we have corrected for all the sites
if number_of_surface_sites < 0:
adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!'
adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)'
break

if number_of_surface_sites > 0:
adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites)
adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)'

return True
else:
if number_of_surface_sites == 1:
if len(molecule.split()) == 1:
self.sidt_taggings_and_decompositions["Pt111_monodentate_adsorption_corrections"](molecule)
root = self.sidts["Pt111_monodentate_adsorption_corrections"].nodes["Root"]
data, unc, tr = self.sidts["Pt111_monodentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
else:
self.sidt_taggings_and_decompositions["Pt111_vdw_adsorption_corrections"](molecule)
data, unc, tr = self.sidts["Pt111_vdw_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
elif number_of_surface_sites == 2:
self.sidt_taggings_and_decompositions["Pt111_bidentate_adsorption_corrections"](molecule)
data, unc, tr = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
else: # >2 surface sites
data = None
sts = self.sidt_taggings_and_decompositions["Pt111_multidentate_adsorption_corrections"](molecule)
Nsts = 0
for st in sts:
if data is None:
dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st)
else:
dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st)

if dE is not None:
if data is None:
data = dE
else:
data += dE
Nsts += 1

data /= Nsts
unc = [275.3710444584647,
35.06003505844255,
9.443946756077908,
12.96897559887163,
16.813134867837555,
20.353756412672936,
26.25846562429047,
30.888440606215646,
38.49498107527693] #based on tridentate data
tr = "estimators not designed to handle >2 adsorption sites corrections errors may be very high"

molecule.clear_labeled_atoms()

adsorption_thermo.H298.value_si += data[0]*1000.0 #kJ/mol
adsorption_thermo.H298.uncertainty_si += unc[0]*1000.0
adsorption_thermo.S298.value_si += data[1] #J/(mol*K)
adsorption_thermo.S298.uncertainty_si += unc[1]
adsorption_thermo.Cpdata.value_si += data[2:] #J/(mol*K)
adsorption_thermo.Cpdata.uncertainty_si += unc[2:]
adsorption_thermo.comment += f'{self.adsorption_groups}({tr})'

return True

def get_thermo_data_from_libraries(self, species, training_set=None):
"""
Return the thermodynamic parameters for a given :class:`Species`
Expand Down
6 changes: 5 additions & 1 deletion rmgpy/rmg/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ def save_input(self, path=None):
save_input_file(path, self)

def load_database(self):
if "SIDT" in self.adsorption_groups:
Pt111_adsorption = "adsorptionSIDTPt111"
else:
Pt111_adsorption = "adsorptionPt111"
self.database = RMGDatabase()
self.database.load(
path=self.database_directory,
Expand All @@ -410,7 +414,7 @@ def load_database(self):
kinetics_families=self.kinetics_families,
kinetics_depositories=self.kinetics_depositories,
statmech_libraries = self.statmech_libraries,
adsorption_groups='adsorptionPt111', # use Pt111 groups for training reactions
adsorption_groups=Pt111_adsorption, # use Pt111 groups for training reactions
# frequenciesLibraries = self.statmech_libraries,
depository=False, # Don't bother loading the depository information, as we don't use it
)
Expand Down
Loading
Loading