ReactionMechanismGenerator · mjohnson541 · Mar 27, 2026 · Mar 27, 2026 · Mar 28, 2026 · Apr 30, 2026
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -63,6 +63,7 @@ requirements:
     - conda-forge::gprof2dot
     - conda-forge::numdifftools
     - conda-forge::quantities !=0.16.0,!=0.16.1
+    - rmg::pysidt-rmg >=1.2
     - rmg::pydas >=1.0.3
     - rmg::pydqed >=1.0.3
     - rmg::symmetry

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -37,7 +37,7 @@ concurrency:
 env:
   # if running on RMG-Py but requiring changes on an un-merged branch of RMG-database, replace
   # main with the name of the branch
-  RMG_DATABASE_BRANCH: main
+  RMG_DATABASE_BRANCH: sidt_adsorption_corrections
   # RMS branch to use for ReactionMechanismSimulator installation
   RMS_BRANCH: for_rmg
   # RMS mode used for install_rms.sh

diff --git a/environment.yml b/environment.yml
@@ -47,6 +47,7 @@ dependencies:
   - conda-forge::cclib >=1.6.3,<1.9
   - conda-forge::openbabel >= 3
   - conda-forge::rdkit >=2022.09.1
+  - rmg::pysidt-rmg >=1.2
 
 # Python tools
   - conda-forge::python >=3.9,<3.12  # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps)

diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py
@@ -40,6 +40,9 @@
 
 import numpy as np
 
+from pysidt import read_nodes, MultiTargetSingleEvalSubgraphIsomorphicDecisionTree
+from pysidt.utils import find_shortest_paths
+
 import rmgpy.constants as constants
 import rmgpy.molecule
 import rmgpy.quantity
@@ -857,6 +860,8 @@ def __init__(self):
         self.libraries = {}
         self.surface = {}
         self.groups = {}
+        self.sidts = {}
+        self.sidt_taggings_and_decompositions = {}
         self.adsorption_groups = "adsorptionPt111"
         self.library_order = []
         self.local_context = {
@@ -883,6 +888,7 @@ def __reduce__(self):
             'depository': self.depository,
             'libraries': self.libraries,
             'groups': self.groups,
+            'sidts': self.sidts,
             'library_order': self.library_order,
             'surface' : self.surface,
         }
@@ -895,6 +901,7 @@ def __setstate__(self, d):
         self.depository = d['depository']
         self.libraries = d['libraries']
         self.groups = d['groups']
+        self.sidts = d['sidts']
         self.library_order = d['library_order']
         self.surface = d['surface']
 
@@ -909,6 +916,7 @@ def load(self, path, libraries=None, depository=True, surface=False):
             self.depository = {}
         self.load_libraries(os.path.join(path, 'libraries'), libraries)
         self.load_groups(os.path.join(path, 'groups'))
+        self.load_sidts(os.path.join(path,'sidt'))
         if surface:
             self.load_surface()
 
@@ -1004,6 +1012,72 @@ def load_groups(self, path):
 
         self.record_ring_generic_nodes()
         self.record_polycylic_generic_nodes()
+
+    def load_sidts(self, path):
+        """
+        Load thermo SIDTs from the given `path` on disk, where `path`
+        points to the top-level folder of the thermo database.
+        """
+        logging.info('Loading thermodynamics SIDTs from {0}...'.format(path))
+        categories = [
+            "Pt111_monodentate_adsorption_corrections",
+            "Pt111_bidentate_adsorption_corrections",
+            "Pt111_vdw_adsorption_corrections",
+        ]
+
+        def add_monodentate_tag(m):
+            for a in m.atoms:
+                if a.is_surface_site():
+                    adatom = list(a.bonds.keys())[0]
+                    adatom.label = "*"
+                    break
+
+        def add_bidentate_tag(m):
+            structs = []
+            sites = m.get_surface_sites()
+            s1 = sites[0]
+            s2 = sites[1]
+            paths = find_shortest_paths(s1,s2)
+            path = paths[0]
+            for a in path:
+                a.label = "*"
+
+        def add_vdw_tag(m):
+            for a in m.atoms:
+                if a.is_surface_site():
+                    a.label = "*"
+                    break
+
+        def multidentate_decomposition(m):
+            structs = []
+            sites = m.get_surface_sites()
+            for tup in itertools.combinations(sites,2):
+                s1 = tup[0]
+                s2 = tup[1]
+                paths = find_shortest_paths(s1,s2)
+                path = paths[0]
+                inds = []
+                for p in path:
+                    inds.append(m.atoms.index(p))
+                mol = m.copy(deep=True)
+                for ind in inds:
+                    mol.atoms[ind].label = "*"
+                structs.append(mol)
+
+            return structs
+
+        self.sidt_taggings_and_decompositions = {
+            "Pt111_monodentate_adsorption_corrections": add_monodentate_tag,
+            "Pt111_bidentate_adsorption_corrections": add_bidentate_tag,
+            "Pt111_vdw_adsorption_corrections": add_vdw_tag,
+            "Pt111_multidentate_adsorption_corrections": multidentate_decomposition, #special
+        }
+
+        for category in categories:
+            if os.path.exists(os.path.join(path,category+".json")):
+                nodes = read_nodes(os.path.join(path,category+".json"))
+                tree = MultiTargetSingleEvalSubgraphIsomorphicDecisionTree(nodes=nodes)
+                self.sidts[category] = tree
 
     def save(self, path):
         """
@@ -1293,7 +1367,7 @@ def get_thermo_data(self, species, metal_to_scale_to=None, training_set=None):
         if species.contains_surface_site():
             try:
                 thermo0 = self.get_thermo_data_for_surface_species(species)
-                metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1]
+                metal_to_scale_from = self.adsorption_groups.split('adsorption')[-1].replace("SIDT","",1)
                 if metal_to_scale_from != metal_to_scale_to:
                     thermo0 = self.correct_binding_energy(thermo0, species, metal_to_scale_from=metal_to_scale_from, metal_to_scale_to=metal_to_scale_to)  # group adsorption values come from Pt111
                 return thermo0
@@ -1683,81 +1757,133 @@ def _add_adsorption_correction(self, adsorption_thermo, adsorption_groups, molec
             molecule ([Molecule]): the molecule to apply the thermo correction
             surface_sites ([list([Atom])]): a list of the surface site atoms in the molecule
         """
-
         number_of_surface_sites = len(surface_sites)
-
-        matches = []
-        for atom in surface_sites:
-            labeled_atoms = {'*': atom}
-            node = adsorption_groups.descend_tree(molecule, labeled_atoms)
-            if node is None: 
-                # no match, so try the next surface site
-                continue
-            while node is not None and node.data is None:
-                node = node.parent
-            if node is None:
-                # no data, so try the next surface site
-                continue
-            data = node.data
-            comment = node.label
-            loop_count = 0
-            while isinstance(data, str):
-                loop_count += 1
-                if loop_count > 100:
-                    raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular"
-                                    f" reference may exist. Last node was {node.label} pointing to group called {data} in "
-                                    f"database {adsorption_groups.label}")
-
-                for entry in adsorption_groups.entries.values():
-                    if entry.label == data:
-                        data = entry.data
-                        comment = entry.label
-                        break
-                else:
-                    raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} "
-                                    f"in database {adsorption_groups.label}")
-            data.comment = f'{adsorption_groups.label}({comment})'
-            group_surface_sites = node.item.get_surface_sites()
-            if len(group_surface_sites) == number_of_surface_sites:
-                # all the surface sites are accounted for so add the adsorption group and return
-                add_thermo_data(adsorption_thermo, data, group_additivity=True)
-                return True
-            else:
-                # we have not found a full match yet, so append and keep looking
-                matches.append((len(group_surface_sites),data))
-
-        if len(matches) == 0:
-            raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}")
-        matches.sort(key = lambda x: -x[0])
-        # sort the matches by descending number of surface sites
-        corrections_applied = 0
-        # start a counter for the number of corrections applied
-        for number_of_group_sites, data in matches:
-            if number_of_surface_sites - number_of_group_sites < 0:
-                # too many sites in this group, skip to the next one
-                continue
-            if not corrections_applied:
-                # this is the first correction, so add H298, S298, and Cp
-                add_thermo_data(adsorption_thermo, data, group_additivity=True)
-            else:
-                # We have already corrected S298 and Cp, so we only want to correct H298
-                adsorption_thermo.H298.value_si += data.H298.value_si
-                adsorption_thermo.comment += ' + H298({0})'.format(data.comment)
-            corrections_applied += 1
-            number_of_surface_sites -= number_of_group_sites
-            if number_of_surface_sites <= 0:
-                # we have corrected for all the sites
-                if number_of_surface_sites < 0:
-                    adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!'
-                    adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)'
-                break
 
-        if number_of_surface_sites > 0:
-            adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites)
-            adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)'
-
-        return True
+        if "SIDT" not in self.adsorption_groups: 
+            matches = []
+            for atom in surface_sites:
+                labeled_atoms = {'*': atom}
+                node = adsorption_groups.descend_tree(molecule, labeled_atoms)
+                if node is None: 
+                    # no match, so try the next surface site
+                    continue
+                while node is not None and node.data is None:
+                    node = node.parent
+                if node is None:
+                    # no data, so try the next surface site
+                    continue
+                data = node.data
+                comment = node.label
+                loop_count = 0
+                while isinstance(data, str):
+                    loop_count += 1
+                    if loop_count > 100:
+                        raise DatabaseError("Maximum iterations reached while following thermo group data pointers. A circular"
+                                        f" reference may exist. Last node was {node.label} pointing to group called {data} in "
+                                        f"database {adsorption_groups.label}")
+
+                    for entry in adsorption_groups.entries.values():
+                        if entry.label == data:
+                            data = entry.data
+                            comment = entry.label
+                            break
+                    else:
+                        raise DatabaseError(f"Node {node.label} points to a non-existing group called {data} "
+                                        f"in database {adsorption_groups.label}")
+                data.comment = f'{adsorption_groups.label}({comment})'
+                group_surface_sites = node.item.get_surface_sites()
+                if len(group_surface_sites) == number_of_surface_sites:
+                    # all the surface sites are accounted for so add the adsorption group and return
+                    add_thermo_data(adsorption_thermo, data, group_additivity=True)
+                    return True
+                else:
+                    # we have not found a full match yet, so append and keep looking
+                    matches.append((len(group_surface_sites),data))
+
+            if len(matches) == 0:
+                raise DatabaseError(f"Could not find an adsorption correction in {adsorption_groups.label} for {molecule}")
+            matches.sort(key = lambda x: -x[0])
+            # sort the matches by descending number of surface sites
+            corrections_applied = 0
+            # start a counter for the number of corrections applied
+            for number_of_group_sites, data in matches:
+                if number_of_surface_sites - number_of_group_sites < 0:
+                    # too many sites in this group, skip to the next one
+                    continue
+                if not corrections_applied:
+                    # this is the first correction, so add H298, S298, and Cp
+                    add_thermo_data(adsorption_thermo, data, group_additivity=True)
+                else:
+                    # We have already corrected S298 and Cp, so we only want to correct H298
+                    adsorption_thermo.H298.value_si += data.H298.value_si
+                    adsorption_thermo.comment += ' + H298({0})'.format(data.comment)
+                corrections_applied += 1
+                number_of_surface_sites -= number_of_group_sites
+                if number_of_surface_sites <= 0:
+                    # we have corrected for all the sites
+                    if number_of_surface_sites < 0:
+                        adsorption_thermo.comment += ' WARNING(Too many adsorption corrections were added to the thermo!'
+                        adsorption_thermo.comment += 'The H298 is very likely understimated as a result!)'
+                    break
+
+            if number_of_surface_sites > 0:
+                adsorption_thermo.comment += ' WARNING({} surface sites were unaccounted for with adsorption corrections!'.format(number_of_surface_sites)
+                adsorption_thermo.comment += 'The H298 is very likely overestimated as a result!)'
 
+            return True
+        else:
+            if number_of_surface_sites == 1:
+                if len(molecule.split()) == 1:
+                    self.sidt_taggings_and_decompositions["Pt111_monodentate_adsorption_corrections"](molecule)
+                    root = self.sidts["Pt111_monodentate_adsorption_corrections"].nodes["Root"]
+                    data, unc, tr = self.sidts["Pt111_monodentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
+                else:
+                    self.sidt_taggings_and_decompositions["Pt111_vdw_adsorption_corrections"](molecule)
+                    data, unc, tr = self.sidts["Pt111_vdw_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
+            elif number_of_surface_sites == 2:
+                self.sidt_taggings_and_decompositions["Pt111_bidentate_adsorption_corrections"](molecule)
+                data, unc, tr = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(molecule,trace=True,estimate_uncertainty=True)
+            else: # >2 surface sites
+                data = None
+                sts = self.sidt_taggings_and_decompositions["Pt111_multidentate_adsorption_corrections"](molecule)
+                Nsts = 0
+                for st in sts:
+                    if data is None:
+                        dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st)
+                    else:
+                        dE = self.sidts["Pt111_bidentate_adsorption_corrections"].evaluate(st)
+
+                    if dE is not None:
+                        if data is None:
+                            data = dE
+                        else:
+                            data += dE
+                        Nsts += 1
+
+                data /= Nsts
+                unc = [275.3710444584647,
+                        35.06003505844255,
+                        9.443946756077908,
+                        12.96897559887163,
+                        16.813134867837555,
+                        20.353756412672936,
+                        26.25846562429047,
+                        30.888440606215646,
+                        38.49498107527693] #based on tridentate data
+                tr = "estimators not designed to handle >2 adsorption sites corrections errors may be very high"
+
+            molecule.clear_labeled_atoms()
+
+            adsorption_thermo.H298.value_si += data[0]*1000.0 #kJ/mol
+            adsorption_thermo.H298.uncertainty_si += unc[0]*1000.0
+            adsorption_thermo.S298.value_si += data[1] #J/(mol*K)
+            adsorption_thermo.S298.uncertainty_si += unc[1]
+            adsorption_thermo.Cpdata.value_si += data[2:] #J/(mol*K)
+            adsorption_thermo.Cpdata.uncertainty_si += unc[2:]
+            adsorption_thermo.comment += f'{self.adsorption_groups}({tr})'
+
+            return True 
+
     def get_thermo_data_from_libraries(self, species, training_set=None):
         """
         Return the thermodynamic parameters for a given :class:`Species`

diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
@@ -400,6 +400,10 @@ def save_input(self, path=None):
         save_input_file(path, self)
 
     def load_database(self):
+        if "SIDT" in self.adsorption_groups:
+            Pt111_adsorption = "adsorptionSIDTPt111"
+        else:
+            Pt111_adsorption = "adsorptionPt111"
         self.database = RMGDatabase()
         self.database.load(
             path=self.database_directory,
@@ -410,7 +414,7 @@ def load_database(self):
             kinetics_families=self.kinetics_families,
             kinetics_depositories=self.kinetics_depositories,
             statmech_libraries = self.statmech_libraries,
-            adsorption_groups='adsorptionPt111', # use Pt111 groups for training reactions
+            adsorption_groups=Pt111_adsorption, # use Pt111 groups for training reactions
             # frequenciesLibraries = self.statmech_libraries,
             depository=False,  # Don't bother loading the depository information, as we don't use it
         )