From 4fa2b857a40832663b66705d5a286e3ba40c4ce8 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:29:50 +0200
Subject: [PATCH 01/43] feat(json): add Stripped newtype for DerivingVia
 carrier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce `newtype Stripped a` in API.JsonOptions exposing ToJSON,
FromJSON, and ToSchema instances that delegate to the existing
strippedToJSON/strippedToEncoding/strippedParseJSON/strippedSchemaOptions
helpers. The instance bodies bypass the user-facing typeclass on `a` and
call the Generic helpers directly, so a user-side
`deriving via (Stripped Foo) instance ToJSON Foo` does not recurse.

This is pure addition — no existing instance is converted yet. The next
two commits collapse ~180 hand-rolled instance blocks in src/API/Types.hs,
src/Types.hs, and src/API/OpenApi.hs onto this carrier.

Categorically: Stripped a is a zero-cost iso (Coercible) of a, so the
Generic Rep is shared; the typeclass dictionary derived for Stripped a
transfers along the iso to a itself.
---
 src/API/JsonOptions.hs | 47 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs
index 6f81ceb8..442fec54 100644
--- a/src/API/JsonOptions.hs
+++ b/src/API/JsonOptions.hs
@@ -1,4 +1,7 @@
 {-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE UndecidableInstances #-}
 
 module API.JsonOptions (
     stripLowerPrefix,
@@ -6,11 +9,14 @@ module API.JsonOptions (
     strippedToEncoding,
     strippedParseJSON,
     strippedSchemaOptions,
+    Stripped (..),
 ) where
 
 import Data.Aeson (
     Encoding,
+    FromJSON (..),
     Options,
+    ToJSON (..),
     Value,
     defaultOptions,
     fieldLabelModifier,
@@ -20,7 +26,10 @@ import Data.Aeson (
  )
 import Data.Aeson.Types (GFromJSON, GToEncoding, GToJSON', Parser, Zero)
 import Data.Char (isLower, toLower)
-import Data.OpenApi.Schema (SchemaOptions, fromAesonOptions)
+import Data.OpenApi.Schema (SchemaOptions, ToSchema (..), fromAesonOptions, genericDeclareNamedSchema)
+import Data.OpenApi.Internal.Schema (GToSchema)
+import Data.Proxy (Proxy (..))
+import Data.Typeable (Typeable)
 import GHC.Generics (Generic, Rep)
 
 stripLowerPrefix :: Options
@@ -43,3 +52,39 @@ strippedParseJSON = genericParseJSON stripLowerPrefix
 
 strippedSchemaOptions :: SchemaOptions
 strippedSchemaOptions = fromAesonOptions stripLowerPrefix
+
+{- | DerivingVia carrier collapsing the ~180 hand-rolled @ToJSON@ \/ @FromJSON@
+\/ @ToSchema@ instance blocks that share the same shape:
+
+@
+  toJSON     = strippedToJSON
+  toEncoding = strippedToEncoding
+  parseJSON  = strippedParseJSON
+  declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
+@
+
+Usage:
+
+@
+  data Foo = Foo { fooBar :: Int } deriving Generic
+  deriving via (Stripped Foo) instance ToJSON   Foo
+  deriving via (Stripped Foo) instance FromJSON Foo
+  deriving via (Stripped Foo) instance ToSchema Foo
+@
+
+Categorically: dictionary transport along the zero-cost iso witnessed by
+'Coercible'. The instance bodies bypass the user-facing typeclass on @a@ and
+call the @Generic@ helpers directly, avoiding the recursion that would arise
+from @instance ToJSON a => ToJSON (Stripped a)@.
+-}
+newtype Stripped a = Stripped {unStripped :: a}
+
+instance (Generic a, GToJSON' Value Zero (Rep a), GToEncoding Zero (Rep a)) => ToJSON (Stripped a) where
+    toJSON (Stripped a) = strippedToJSON a
+    toEncoding (Stripped a) = strippedToEncoding a
+
+instance (Generic a, GFromJSON Zero (Rep a)) => FromJSON (Stripped a) where
+    parseJSON v = Stripped <$> strippedParseJSON v
+
+instance (Typeable a, Generic a, GToSchema (Rep a)) => ToSchema (Stripped a) where
+    declareNamedSchema _ = genericDeclareNamedSchema strippedSchemaOptions (Proxy :: Proxy a)

From ff63b2097531fc6a2df6cb6419cc29004ecd4a93 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:39:17 +0200
Subject: [PATCH 02/43] refactor(api): collapse 94 ToJSON/FromJSON instances
 via Stripped

Convert hand-rolled `instance ToJSON X where toJSON = strippedToJSON; ...`
and `instance FromJSON X where parseJSON = strippedParseJSON` blocks for
70 record types in API/Types.hs to attached deriving clauses:

    data Foo = Foo {...}
        deriving (Generic)
        deriving (ToJSON, FromJSON) via (Stripped Foo)

The wire format is unchanged (openapi.json byte-identical against the
pre-branch baseline; full hspec suite green at 1052/1052).

Kept manual: ApiFlow (custom tagged-union shape), NodeType / EdgeType /
FlowRole (default generic sum encoding), PerturbedEntry (custom
Either-flatten).

ToSchema migration deliberately deferred: instances live as orphans in
API/OpenApi.hs and depend on types (Exchange, ApiFlow) whose schemas
are themselves defined there, creating a compilation-order cycle if we
try to derive in API/Types.hs. Will revisit as its own commit.
---
 src/API/Types.hs | 167 ++++++++++++++++++++---------------------------
 1 file changed, 72 insertions(+), 95 deletions(-)

diff --git a/src/API/Types.hs b/src/API/Types.hs
index b1f86f89..81fb81d2 100644
--- a/src/API/Types.hs
+++ b/src/API/Types.hs
@@ -1,11 +1,12 @@
 {-# LANGUAGE DeriveGeneric #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DerivingVia #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
 
 module API.Types where
 
-import API.JsonOptions (strippedParseJSON, strippedToEncoding, strippedToJSON)
+import API.JsonOptions (Stripped (..), strippedParseJSON, strippedToEncoding, strippedToJSON)
 import Data.Aeson
 import Data.Aeson.Types (Parser)
 import qualified Data.ByteString.Lazy as BSL
@@ -117,6 +118,7 @@ data ActivitySummary = ActivitySummary
     , prsAllocationFormula :: Maybe Text -- Raw SimaPro allocation formula; Nothing if purely numeric
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivitySummary)
 
 -- | Consumer result — ActivitySummary enriched with BFS depth from the queried supplier
 data ConsumerResult = ConsumerResult
@@ -130,6 +132,7 @@ data ConsumerResult = ConsumerResult
     , crClassifications :: M.Map Text Text -- Classifications (ISIC, CPC, Category, etc.), mirrors SupplyChainEntry
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ConsumerResult)
 
 {- | Wrapper for /consumers responses. Mirrors 'SupplyChainResponse' so clients
 have a uniform {entries, edges} shape in both traversal directions. Edges
@@ -141,6 +144,7 @@ data ConsumersResponse = ConsumersResponse
     , crrEdges :: ![SupplyChainEdge]
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ConsumersResponse)
 
 -- | Enhanced flow information for search results (now includes synonyms)
 data FlowSearchResult = FlowSearchResult
@@ -151,6 +155,7 @@ data FlowSearchResult = FlowSearchResult
     , fsrSynonyms :: M.Map Text [Text] -- Synonyms by language (converted from Set to List for JSON)
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowSearchResult)
 
 -- | Inventory export data structures
 data InventoryExport = InventoryExport
@@ -159,6 +164,7 @@ data InventoryExport = InventoryExport
     , ieStatistics :: InventoryStatistics
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped InventoryExport)
 
 data InventoryMetadata = InventoryMetadata
     { imRootActivity :: ActivitySummary
@@ -167,6 +173,7 @@ data InventoryMetadata = InventoryMetadata
     , imResourceFlows :: Int -- Biosphere inputs (resource extraction)
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped InventoryMetadata)
 
 data InventoryFlowDetail = InventoryFlowDetail
     { ifdFlow :: BiosphereFlow -- Inventory flows are always biosphere
@@ -176,6 +183,7 @@ data InventoryFlowDetail = InventoryFlowDetail
     , ifdCategory :: Text -- Flow category for grouping
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped InventoryFlowDetail)
 
 data InventoryStatistics = InventoryStatistics
     { isTotalQuantity :: Double -- Sum of absolute values
@@ -184,6 +192,7 @@ data InventoryStatistics = InventoryStatistics
     , isTopCategories :: [(Text, Int)] -- Top flow categories by count
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped InventoryStatistics)
 
 -- | Tree export data structures for visualization
 data TreeExport = TreeExport
@@ -192,6 +201,7 @@ data TreeExport = TreeExport
     , teEdges :: [TreeEdge]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped TreeExport)
 
 data TreeMetadata = TreeMetadata
     { tmRootId :: Text -- Changed to Text (ProcessId format)
@@ -202,6 +212,7 @@ data TreeMetadata = TreeMetadata
     , tmExpandableNodes :: Int -- Nodes that could expand further
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped TreeMetadata)
 
 data ExportNode = ExportNode
     { enId :: Text -- Changed to Text (ProcessId format)
@@ -217,6 +228,7 @@ data ExportNode = ExportNode
     , enCompartment :: Maybe Text -- Biosphere compartment (air/water/soil), only for BiosphereNodes
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ExportNode)
 
 data NodeType = ActivityNode | LoopNode | BiosphereEmissionNode | BiosphereResourceNode
     deriving (Eq, Show, Generic)
@@ -233,6 +245,7 @@ data TreeEdge = TreeEdge
     , teEdgeType :: EdgeType -- Type of edge (technosphere or biosphere)
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped TreeEdge)
 
 data FlowInfo = FlowInfo
     { fiId :: UUID
@@ -240,6 +253,7 @@ data FlowInfo = FlowInfo
     , fiCategory :: Text
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowInfo)
 
 -- | Graph export data structures for network visualization
 data GraphExport = GraphExport
@@ -248,6 +262,7 @@ data GraphExport = GraphExport
     , geUnitGroups :: M.Map Text Text -- Unit to unit group mapping
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped GraphExport)
 
 data GraphNode = GraphNode
     { gnNodeId :: Int -- Numeric ID for efficient frontend processing
@@ -258,6 +273,7 @@ data GraphNode = GraphNode
     , gnLocation :: Text -- Geography
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped GraphNode)
 
 data GraphEdge = GraphEdge
     { geSource :: Int -- Source node ID
@@ -267,6 +283,7 @@ data GraphEdge = GraphEdge
     , geFlowName :: Text -- Name of the flow
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped GraphEdge)
 
 {- | Lightweight flow information for lists. Carries either a tech or a bio
 flow; the @ApiFlow@ tag is the wire discriminator.
@@ -278,6 +295,7 @@ data FlowSummary = FlowSummary
     , fsRole :: FlowRole -- Role in this specific activity
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowSummary)
 
 -- | Role of a flow in a specific activity context
 data FlowRole = InputFlow | OutputFlow | ReferenceProductFlow
@@ -295,12 +313,14 @@ data MethodSummary = MethodSummary
     , msmCollection :: Text -- Parent collection name (e.g., "ef-31")
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped MethodSummary)
 
 -- | Method collection list response
 newtype MethodCollectionListResponse = MethodCollectionListResponse
     { mclMethods :: [MethodCollectionStatusAPI]
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped MethodCollectionListResponse)
 
 -- | Method collection status for API responses
 data MethodCollectionStatusAPI = MethodCollectionStatusAPI
@@ -314,12 +334,14 @@ data MethodCollectionStatusAPI = MethodCollectionStatusAPI
     , mcaFormat :: Maybe Text -- Format (e.g. "ILCD")
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped MethodCollectionStatusAPI)
 
 -- | Reference data list response (flow synonyms, compartment mappings, units)
 newtype RefDataListResponse = RefDataListResponse
     { rdlItems :: [RefDataStatusAPI]
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped RefDataListResponse)
 
 -- | Reference data status for API responses
 data RefDataStatusAPI = RefDataStatusAPI
@@ -332,12 +354,14 @@ data RefDataStatusAPI = RefDataStatusAPI
     , rdaEntryCount :: Int
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped RefDataStatusAPI)
 
 -- | Synonym groups response
 newtype SynonymGroupsResponse = SynonymGroupsResponse
     { sgrGroups :: [[Text]]
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped SynonymGroupsResponse)
 
 -- | Full method details
 data MethodDetail = MethodDetail
@@ -350,6 +374,7 @@ data MethodDetail = MethodDetail
     , mdFactorCount :: Int
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped MethodDetail)
 
 -- | Characterization factor for API response
 data MethodFactorAPI = MethodFactorAPI
@@ -359,6 +384,7 @@ data MethodFactorAPI = MethodFactorAPI
     , mfaValue :: Double -- CF value
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped MethodFactorAPI)
 
 -- | A single flow's contribution to an LCIA score
 data FlowContributionEntry = FlowContributionEntry
@@ -371,6 +397,7 @@ data FlowContributionEntry = FlowContributionEntry
     , fcoCfValue :: Double -- Raw characterization factor value
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowContributionEntry)
 
 -- | LCIA result for a single impact category
 data LCIAResult = LCIAResult
@@ -387,6 +414,7 @@ data LCIAResult = LCIAResult
     , lrTopContributors :: [FlowContributionEntry] -- Top contributing elementary flows
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped LCIAResult)
 
 -- | Contributing flows result: top elementary flows for a specific impact category
 data ContributingFlowsResult = ContributingFlowsResult
@@ -396,6 +424,7 @@ data ContributingFlowsResult = ContributingFlowsResult
     , cfrTopFlows :: [FlowContributionEntry]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ContributingFlowsResult)
 
 -- | A single activity's contribution to an LCIA score
 data ActivityContribution = ActivityContribution
@@ -407,6 +436,7 @@ data ActivityContribution = ActivityContribution
     , acSharePct :: Double -- Percentage of total score (0-100)
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ActivityContribution)
 
 -- | Contributing activities result: top upstream activities for a specific impact category
 data ContributingActivitiesResult = ContributingActivitiesResult
@@ -416,12 +446,14 @@ data ContributingActivitiesResult = ContributingActivitiesResult
     , carActivities :: [ActivityContribution]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ContributingActivitiesResult)
 
 -- | Batch impacts request: compute LCIA for every process in one call.
 newtype BatchImpactsRequest = BatchImpactsRequest
     { birProcessIds :: [Text]
     }
     deriving (Generic)
+    deriving (FromJSON) via (Stripped BatchImpactsRequest)
 
 -- | One entry of a batch impacts response.
 data BatchImpactsEntry = BatchImpactsEntry
@@ -430,6 +462,7 @@ data BatchImpactsEntry = BatchImpactsEntry
     , bieImpacts :: LCIABatchResult
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped BatchImpactsEntry)
 
 {- | Batch impacts response: one entry per successfully computed process,
 plus lists of process ids that could not be resolved.
@@ -440,6 +473,7 @@ data BatchImpactsResponse = BatchImpactsResponse
     , birInvalid :: [Text]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped BatchImpactsResponse)
 
 {- | A single scoring-set indicator: the per-variable normalized-weighted value
 plus the impact category it came from. Value is pre-multiplied by the
@@ -450,6 +484,7 @@ data ScoringIndicator = ScoringIndicator
     , siValue :: Double
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ScoringIndicator)
 
 -- | Batch LCIA result with optional single score
 data LCIABatchResult = LCIABatchResult
@@ -466,6 +501,7 @@ data LCIABatchResult = LCIABatchResult
     -- ^ Scoring set name → (variable name → indicator). One row per scoring variable.
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped LCIABatchResult)
 
 -- | Flow mapping status for a method
 data MappingStatus = MappingStatus
@@ -483,6 +519,7 @@ data MappingStatus = MappingStatus
     , mstUnmappedFlows :: [UnmappedFlowAPI] -- Details of unmapped flows
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped MappingStatus)
 
 -- | Details about an unmapped flow
 data UnmappedFlowAPI = UnmappedFlowAPI
@@ -491,6 +528,7 @@ data UnmappedFlowAPI = UnmappedFlowAPI
     , ufaDirection :: Text -- "Input" or "Output"
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped UnmappedFlowAPI)
 
 -- | DB-flow-centric mapping: all biosphere flows with their CF assignments
 data FlowCFMapping = FlowCFMapping
@@ -501,6 +539,7 @@ data FlowCFMapping = FlowCFMapping
     , fcmFlows :: [FlowCFEntry]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowCFMapping)
 
 -- | A single DB biosphere flow with its CF assignment (if any)
 data FlowCFEntry = FlowCFEntry
@@ -512,6 +551,7 @@ data FlowCFEntry = FlowCFEntry
     , fceMatchStrategy :: Maybe Text -- "uuid" | "name" | "synonym"
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowCFEntry)
 
 -- | Characterization result: matched CFs for a method in a database
 data CharacterizationResult = CharacterizationResult
@@ -522,6 +562,7 @@ data CharacterizationResult = CharacterizationResult
     , chrFactors :: [CharacterizationEntry]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped CharacterizationResult)
 
 -- | A single matched characterization factor
 data CharacterizationEntry = CharacterizationEntry
@@ -537,12 +578,14 @@ data CharacterizationEntry = CharacterizationEntry
     , cheMatchStrategy :: Text -- "uuid", "cas", "name", "synonym", "fuzzy"
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped CharacterizationEntry)
 
 -- | Database list response
 newtype DatabaseListResponse = DatabaseListResponse
     { dlrDatabases :: [DatabaseStatusAPI] -- All available databases
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped DatabaseListResponse)
 
 -- | Database status for API responses
 data DatabaseStatusAPI = DatabaseStatusAPI
@@ -558,6 +601,7 @@ data DatabaseStatusAPI = DatabaseStatusAPI
     , dsaDependsOn :: [Text] -- Names of databases this one depends on (for cross-DB linking)
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped DatabaseStatusAPI)
 
 -- | Response for database activation
 data ActivateResponse = ActivateResponse
@@ -566,6 +610,7 @@ data ActivateResponse = ActivateResponse
     , arDatabase :: Maybe DatabaseStatusAPI
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivateResponse)
 
 {- | Response for the re-link endpoint: fresh cross-DB link stats after a
 second-pass linking against the currently-loaded databases.
@@ -578,18 +623,21 @@ data RelinkResponse = RelinkResponse
     , rrDependsOn :: [Text]
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped RelinkResponse)
 
 -- | Result of auto-loading a single dependency
 data DepLoadResult
     = DepLoaded {dlrName :: Text}
     | DepLoadFailed {dlfName :: Text, dlfError :: Text}
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped DepLoadResult)
 
 -- | Response for the load database endpoint
 data LoadDatabaseResponse
     = LoadFailed {ldrError :: Text}
     | LoadSucceeded {ldrDatabase :: DatabaseStatusAPI, ldrDeps :: [DepLoadResult]}
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped LoadDatabaseResponse)
 
 -- | Request for database upload (base64-encoded ZIP)
 data UploadRequest = UploadRequest
@@ -598,6 +646,7 @@ data UploadRequest = UploadRequest
     , urFileData :: Text -- Base64-encoded ZIP file content
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped UploadRequest)
 
 -- | Response for database upload
 data UploadResponse = UploadResponse
@@ -607,6 +656,7 @@ data UploadResponse = UploadResponse
     , uprFormat :: Maybe Text -- Detected format (if successful)
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped UploadResponse)
 
 -- | Supply chain response — all upstream activities with scaling factors
 data SupplyChainResponse = SupplyChainResponse
@@ -617,6 +667,7 @@ data SupplyChainResponse = SupplyChainResponse
     , scrEdges :: [SupplyChainEdge]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped SupplyChainResponse)
 
 {- | A single entry in the supply chain. @sceProcessId@ is bare for entries
 from the root DB and qualified (@"dbName::pid"@) for entries reached via
@@ -636,6 +687,7 @@ data SupplyChainEntry = SupplyChainEntry
     , sceUpstreamCount :: Int -- number of unique upstream activities reachable from this one
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped SupplyChainEntry)
 
 -- | An edge in the upstream supply chain subgraph
 data SupplyChainEdge = SupplyChainEdge
@@ -646,6 +698,7 @@ data SupplyChainEdge = SupplyChainEdge
     , sceEdgeAmount :: Double -- technosphere coefficient
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped SupplyChainEdge)
 
 {- | Request body for POST endpoints that accept substitutions.
 Substitutions modify the scaling vector via Sherman-Morrison rank-1 updates.
@@ -654,6 +707,7 @@ newtype SubstitutionRequest = SubstitutionRequest
     { srSubstitutions :: [Substitution]
     }
     deriving (Generic)
+    deriving (FromJSON) via (Stripped SubstitutionRequest)
 
 {- | A single supplier substitution.
 
@@ -670,6 +724,7 @@ data Substitution = Substitution
     , subConsumer :: Text -- Consumer activity ProcessId (bare or dbName::pid)
     }
     deriving (Generic)
+    deriving (FromJSON) via (Stripped Substitution)
 
 {- | A single rank-1 perturbation of a technosphere coefficient @A_ij@.
 
@@ -684,12 +739,14 @@ data Perturbation = Perturbation
     , perLabel :: Maybe Text -- Optional label for response correlation
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped Perturbation)
 
 -- | Request body for POST sensitivity endpoints. Flat list, V1.
 newtype SensitivityRequest = SensitivityRequest
     { srPerturbations :: [Perturbation]
     }
     deriving (Generic)
+    deriving (FromJSON) via (Stripped SensitivityRequest)
 
 {- | One result entry per perturbation. The 'peResult' carries either an
 error message ('Left') or the (impact, deltaImpact) pair ('Right'). The
@@ -710,6 +767,7 @@ data SensitivityResponse = SensitivityResponse
     , srPerturbed :: [PerturbedEntry]
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped SensitivityResponse)
 
 {- | Name of the request-level "root" database — the DB extracted from the
 URL path and the implicit target of any bare 'ProcessId' (one without the
@@ -759,6 +817,7 @@ data ExchangeWithUnit = ExchangeWithUnit
     , ewuPedigree :: Maybe Pedigree -- LCA data-quality scores when available (mirrors exchangePedigree)
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ExchangeWithUnit)
 
 -- | Activity information optimized for API responses
 data ActivityForAPI = ActivityForAPI
@@ -776,6 +835,7 @@ data ActivityForAPI = ActivityForAPI
     , pfaExchanges :: [ExchangeWithUnit] -- Exchanges with unit names
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivityForAPI)
 
 -- | Streamlined activity information - core data only
 data ActivityInfo = ActivityInfo
@@ -785,6 +845,7 @@ data ActivityInfo = ActivityInfo
     , piLinks :: ActivityLinks -- Links to sub-resources
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivityInfo)
 
 -- | Extended activity metadata
 data ActivityMetadata = ActivityMetadata
@@ -796,6 +857,7 @@ data ActivityMetadata = ActivityMetadata
     , pmReferenceProductFlow :: Maybe UUID -- Flow ID of reference product
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivityMetadata)
 
 -- | Links to related resources
 data ActivityLinks = ActivityLinks
@@ -805,6 +867,7 @@ data ActivityLinks = ActivityLinks
     , plReferenceProductUrl :: Maybe Text -- URL to reference product (if exists)
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivityLinks)
 
 -- | Activity statistics
 data ActivityStats = ActivityStats
@@ -814,6 +877,7 @@ data ActivityStats = ActivityStats
     , psLocation :: Text
     }
     deriving (Generic)
+    deriving (ToJSON, FromJSON) via (Stripped ActivityStats)
 
 -- | Flow with additional metadata. Carries either a tech or bio flow.
 data FlowDetail = FlowDetail
@@ -822,6 +886,7 @@ data FlowDetail = FlowDetail
     , fdUsageCount :: Int -- How many activities use this flow
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped FlowDetail)
 
 {- | Exchange with flow, unit, and target activity information.
 The carried flow's variant lines up with the Exchange variant.
@@ -835,6 +900,7 @@ data ExchangeDetail = ExchangeDetail
     , edTargetActivity :: Maybe ActivitySummary -- Target activity for technosphere inputs
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ExchangeDetail)
 
 -- | A single filter entry returned in a preset
 data ClassificationEntryInfo = ClassificationEntryInfo
@@ -843,6 +909,7 @@ data ClassificationEntryInfo = ClassificationEntryInfo
     , ceiMode :: !Text -- "exact" or "contains"
     }
     deriving (Show, Eq, Generic)
+    deriving (ToJSON) via (Stripped ClassificationEntryInfo)
 
 -- | A named filter preset (from TOML config)
 data ClassificationPresetInfo = ClassificationPresetInfo
@@ -852,6 +919,7 @@ data ClassificationPresetInfo = ClassificationPresetInfo
     , cpiFilters :: ![ClassificationEntryInfo]
     }
     deriving (Show, Eq, Generic)
+    deriving (ToJSON) via (Stripped ClassificationPresetInfo)
 
 -- | Classification system with its values for browsing/filtering
 data ClassificationSystem = ClassificationSystem
@@ -860,6 +928,7 @@ data ClassificationSystem = ClassificationSystem
     , csActivityCount :: Int -- How many activities have this system
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped ClassificationSystem)
 
 {- | Result of an /activity/{pid}/aggregate call.
 
@@ -874,6 +943,7 @@ data Aggregation = Aggregation
     , aggGroups :: [AggregationGroup] -- one entry per group_by bucket (empty when group_by omitted)
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped Aggregation)
 
 -- | One bucket in an aggregation result.
 data AggregationGroup = AggregationGroup
@@ -884,78 +954,16 @@ data AggregationGroup = AggregationGroup
     , aggCount :: Int
     }
     deriving (Generic)
+    deriving (ToJSON) via (Stripped AggregationGroup)
 
 -- JSON instances. All record types use API.JsonOptions.stripLowerPrefix
 -- via the strippedToJSON/strippedToEncoding/strippedParseJSON helpers.
 -- Sum-only types (NodeType, EdgeType, FlowRole) keep default derivation.
-instance ToJSON ConsumerResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON ConsumerResult where parseJSON = strippedParseJSON
-instance ToJSON ConsumersResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON ConsumersResponse where parseJSON = strippedParseJSON
-instance ToJSON ClassificationEntryInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ClassificationPresetInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ClassificationSystem where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON Aggregation where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON AggregationGroup where toJSON = strippedToJSON; toEncoding = strippedToEncoding
 instance (ToJSON a) => ToJSON (SearchResults a) where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivitySummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowSearchResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON InventoryMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON InventoryStatistics where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON TreeExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON TreeMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ExportNode where toJSON = strippedToJSON; toEncoding = strippedToEncoding
 instance ToJSON NodeType
 instance ToJSON EdgeType
-instance ToJSON TreeEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding
 instance ToJSON FlowRole
-instance ToJSON ExchangeWithUnit where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityForAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityLinks where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityStats where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON InventoryFlowDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowSummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON InventoryExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ExchangeDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding
 instance ToJSON Unit where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON GraphExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON GraphNode where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON GraphEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MethodSummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MethodCollectionListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MethodCollectionStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MethodDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MethodFactorAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowContributionEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON LCIAResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ScoringIndicator where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON LCIABatchResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON BatchImpactsEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON BatchImpactsResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON BatchImpactsRequest where parseJSON = strippedParseJSON
-instance ToJSON ContributingFlowsResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivityContribution where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ContributingActivitiesResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON MappingStatus where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON UnmappedFlowAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowCFMapping where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON FlowCFEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON CharacterizationResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON CharacterizationEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON SupplyChainResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON SupplyChainEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON SupplyChainEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON SupplyChainEdge where parseJSON = strippedParseJSON
-instance FromJSON SubstitutionRequest where parseJSON = strippedParseJSON
-instance FromJSON Substitution where parseJSON = strippedParseJSON
-instance FromJSON SensitivityRequest where parseJSON = strippedParseJSON
-instance FromJSON Perturbation where parseJSON = strippedParseJSON
-instance ToJSON Perturbation where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON SensitivityResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
 
 -- Custom ToJSON for PerturbedEntry: flatten the Either so success entries
 -- have impact+deltaImpact and error entries have error.
@@ -968,37 +976,6 @@ instance ToJSON PerturbedEntry where
 
 -- FromJSON instances needed for API conversion
 instance (FromJSON a) => FromJSON (SearchResults a) where parseJSON = strippedParseJSON
-instance FromJSON ActivitySummary where parseJSON = strippedParseJSON
-instance FromJSON ActivityInfo where parseJSON = strippedParseJSON
-instance FromJSON ActivityForAPI where parseJSON = strippedParseJSON
-instance FromJSON ActivityMetadata where parseJSON = strippedParseJSON
-instance FromJSON ActivityLinks where parseJSON = strippedParseJSON
-instance FromJSON ActivityStats where parseJSON = strippedParseJSON
-instance FromJSON ExchangeWithUnit where parseJSON = strippedParseJSON
-instance ToJSON DatabaseListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON DatabaseStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON ActivateResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON RelinkResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON DepLoadResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON LoadDatabaseResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON UploadRequest where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON UploadResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON DatabaseListResponse where parseJSON = strippedParseJSON
-instance FromJSON DatabaseStatusAPI where parseJSON = strippedParseJSON
-instance FromJSON ActivateResponse where parseJSON = strippedParseJSON
-instance FromJSON RelinkResponse where parseJSON = strippedParseJSON
-instance FromJSON DepLoadResult where parseJSON = strippedParseJSON
-instance FromJSON LoadDatabaseResponse where parseJSON = strippedParseJSON
-instance FromJSON UploadRequest where parseJSON = strippedParseJSON
-instance FromJSON UploadResponse where parseJSON = strippedParseJSON
-instance FromJSON MethodCollectionListResponse where parseJSON = strippedParseJSON
-instance FromJSON MethodCollectionStatusAPI where parseJSON = strippedParseJSON
-instance ToJSON RefDataListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON RefDataStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance ToJSON SynonymGroupsResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding
-instance FromJSON RefDataListResponse where parseJSON = strippedParseJSON
-instance FromJSON RefDataStatusAPI where parseJSON = strippedParseJSON
-instance FromJSON SynonymGroupsResponse where parseJSON = strippedParseJSON
 
 -- openapi3 cannot derive ToSchema for BSL.ByteString directly
 newtype BinaryContent = BinaryContent BSL.ByteString

From 492c26c9e89cfc6bcd2b5c25b754ec1be075481e Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:47:35 +0200
Subject: [PATCH 03/43] refactor(types): collapse 6 multi-line JSON instances
 via Stripped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert the manual `instance ToJSON / FromJSON … where toJSON =
genericToJSON stripLowerPrefix; …` blocks for Pedigree, Exchange,
Compartment, TechnosphereFlow, BiosphereFlow, and WasteFlow into
attached `deriving (ToJSON, FromJSON) via (Stripped X)` clauses on
each data declaration.

TechRole and BioDirection keep their empty `instance ToJSON …`
declarations (default Generic encoding for nullary-constructor sum
types — the wire form is the constructor name as a JSON string).

openapi.json byte-identical against the pre-branch baseline;
hspec suite green at 1052/1052.
---
 src/Types.hs | 52 ++++++++++++----------------------------------------
 1 file changed, 12 insertions(+), 40 deletions(-)

diff --git a/src/Types.hs b/src/Types.hs
index ad5ba491..47ddec62 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveAnyClass #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingVia #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings #-}
@@ -12,9 +13,9 @@ module Types (
     UUID,
 ) where
 
-import API.JsonOptions (stripLowerPrefix)
+import API.JsonOptions (Stripped (..))
 import Control.DeepSeq (NFData)
-import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToEncoding, genericToJSON)
+import Data.Aeson (FromJSON (..), ToJSON (..))
 import Data.Int (Int32)
 import qualified Data.IntSet as IS
 import qualified Data.Map as M
@@ -64,6 +65,7 @@ data Compartment = Compartment
     , compartmentSub :: !(Maybe Text) -- "high. pop.", "river water", …
     }
     deriving (Eq, Show, Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped Compartment)
 
 {- | The biosphere flow's medium (air | water | soil | …), or @""@ when the
 source dataset omitted the compartment. Use 'bfCompartment' directly when
@@ -135,6 +137,7 @@ data TechnosphereFlow = TechnosphereFlow
     , tfSubstanceId :: !(Maybe Int)
     }
     deriving (Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped TechnosphereFlow)
 
 {- | A biosphere flow — an environmental exchange (resource extraction or
 emission). Always carries a `Compartment` identifying the medium.
@@ -154,6 +157,7 @@ data BiosphereFlow = BiosphereFlow
     -}
     }
     deriving (Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped BiosphereFlow)
 
 {- | A waste flow — a residual output that a process generates and which a
 treatment activity may consume as its reference input. Sister type to
@@ -175,6 +179,7 @@ data WasteFlow = WasteFlow
     , wfSubstanceId :: !(Maybe Int)
     }
     deriving (Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped WasteFlow)
 
 {- | Pedigree matrix (Weidema & Wesnæs 1996) — five LCA data-quality scores
 each in 1..5 (1 = best, 5 = worst). SimaPro CSV encodes it as a prefix in the
@@ -188,6 +193,7 @@ data Pedigree = Pedigree
     , pedTechnological :: !Int -- 1..5
     }
     deriving (Eq, Show, Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped Pedigree)
 
 {- | Smart constructor: rejects out-of-range values (anything not in 1..5)
 by returning Nothing. Callers should treat Nothing as "no pedigree
@@ -237,6 +243,7 @@ data Exchange
         , waPedigree :: !(Maybe Pedigree) -- LCA data-quality scores when available
         }
     deriving (Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped Exchange)
 
 -- | Helper functions for Exchange variants
 exchangeFlowId :: Exchange -> UUID
@@ -1195,46 +1202,11 @@ data CF = CF
     , cfFactor :: !Double -- Characterization factor
     }
 
--- JSON instances for API compatibility
--- Note: ProcessId is Int32, which already has ToJSON/FromJSON instances
-instance ToJSON Pedigree where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-instance FromJSON Pedigree where
-    parseJSON = genericParseJSON stripLowerPrefix
-instance ToJSON Exchange where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-
-instance FromJSON Exchange where
-    parseJSON = genericParseJSON stripLowerPrefix
-
+-- ToJSON/FromJSON for the records above are produced via `deriving via (Stripped X)`
+-- attached to each `data` declaration. The two enums TechRole and BioDirection use
+-- the default Generic encoding (constructor name as JSON string).
 instance ToJSON TechRole
 instance FromJSON TechRole
 
 instance ToJSON BioDirection
 instance FromJSON BioDirection
-
-instance ToJSON Compartment where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-instance FromJSON Compartment where
-    parseJSON = genericParseJSON stripLowerPrefix
-
-instance ToJSON TechnosphereFlow where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-instance FromJSON TechnosphereFlow where
-    parseJSON = genericParseJSON stripLowerPrefix
-
-instance ToJSON BiosphereFlow where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-instance FromJSON BiosphereFlow where
-    parseJSON = genericParseJSON stripLowerPrefix
-
-instance ToJSON WasteFlow where
-    toJSON = genericToJSON stripLowerPrefix
-    toEncoding = genericToEncoding stripLowerPrefix
-instance FromJSON WasteFlow where
-    parseJSON = genericParseJSON stripLowerPrefix

From 8ae6c9f4a86b5187c2957f2fe8246fe54119bbc9 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:49:36 +0200
Subject: [PATCH 04/43] feat(validation): introduce Validation Applicative for
 accumulating errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Data.Validation — an Applicative-only type that accumulates errors
via the Semigroup on @e@. The canonical example, in Milewski's /Category
Theory for Programmers/, of an Applicative that is not a Monad: a lawful
Monad instance would have to short-circuit on the first Failure to keep
>>= associative w.r.t. the underlying Either behaviour, which would
erase exactly the accumulation that motivates the type. So we stop at
Applicative.

Pure addition — no caller migrated in this commit. The next commit
replaces the nested case/throwError ladder in
src/API/Routes.hs:1296-1318 (Aggregate query-param validation) with an
Applicative chain over Validation (NonEmpty Text), so a request with
both an invalid `scope` AND an invalid `aggregate` parameter reports
both errors instead of just the first.
---
 src/Data/Validation.hs | 71 ++++++++++++++++++++++++++++++++++++++++++
 volca.cabal            |  1 +
 2 files changed, 72 insertions(+)
 create mode 100644 src/Data/Validation.hs

diff --git a/src/Data/Validation.hs b/src/Data/Validation.hs
new file mode 100644
index 00000000..43efd733
--- /dev/null
+++ b/src/Data/Validation.hs
@@ -0,0 +1,71 @@
+{- | An Applicative-only validation type that accumulates errors via the
+'Semigroup' on @e@ — the canonical example, in Milewski's
+/Category Theory for Programmers/, of an Applicative that is /not/ a
+Monad. A lawful 'Monad' instance for 'Validation' would have to
+short-circuit on the first 'Failure' (to make @>>=@ associative w.r.t.
+the underlying 'Either' behaviour); that would erase the accumulation
+that motivates the type, so we stop at 'Applicative'.
+
+Use:
+
+@
+    validateAll
+        :: Maybe Text -> Maybe Text -> Validation (NonEmpty Text) (Foo, Bar)
+    validateAll a b = (,) \<$\> validateA a \<*\> validateB b
+@
+
+When both @validateA@ and @validateB@ fail, the resulting 'Failure'
+carries /both/ messages, not just the first.
+-}
+module Data.Validation (
+    Validation (..),
+    toEither,
+    fromEither,
+    failure,
+    success,
+) where
+
+import Data.List.NonEmpty (NonEmpty)
+import qualified Data.List.NonEmpty as NE
+
+-- | @Failure e@ accumulates via @e@'s 'Semigroup'; @Success a@ propagates.
+data Validation e a
+    = Failure !e
+    | Success !a
+    deriving (Eq, Show)
+
+instance Functor (Validation e) where
+    fmap _ (Failure e) = Failure e
+    fmap f (Success a) = Success (f a)
+
+{- | The 'Applicative' instance is the whole point: when both arguments
+are 'Failure', their error payloads are combined via @(\<\>)@. This is the
+defining behaviour that distinguishes 'Validation' from 'Either'.
+-}
+instance Semigroup e => Applicative (Validation e) where
+    pure = Success
+    Failure e1 <*> Failure e2 = Failure (e1 <> e2)
+    Failure e <*> Success _ = Failure e
+    Success _ <*> Failure e = Failure e
+    Success f <*> Success a = Success (f a)
+
+-- | Convert to 'Either' once accumulation is done (e.g. at a Servant boundary).
+toEither :: Validation e a -> Either e a
+toEither (Failure e) = Left e
+toEither (Success a) = Right a
+
+-- | Inject an 'Either' into 'Validation'. Use 'fromEither' to lift a
+-- short-circuiting parser into an accumulating context.
+fromEither :: Either e a -> Validation e a
+fromEither (Left e) = Failure e
+fromEither (Right a) = Success a
+
+-- | Build a singleton-error failure. Convenient for 'NonEmpty'-keyed
+-- validators where each leaf check produces exactly one error message.
+failure :: e -> Validation (NonEmpty e) a
+failure = Failure . NE.singleton
+
+-- | Inject a value into 'Success'. Symmetric to 'failure'; useful in
+-- chains where readability is helped by an explicit constructor.
+success :: a -> Validation e a
+success = Success
diff --git a/volca.cabal b/volca.cabal
index ba29b9ec..458a07d9 100644
--- a/volca.cabal
+++ b/volca.cabal
@@ -23,6 +23,7 @@ library
   import:              warnings
   hs-source-dirs:      src
   exposed-modules:     Types
+                     , Data.Validation
                      , Progress
                      , UnitConversion
                      , Config

From d0ec56e83a0561a7411144475fdb0e1e59bc249a Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:52:36 +0200
Subject: [PATCH 05/43] refactor(api): aggregate validation via Validation
 Applicative
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the nested case/throwError ladder in the aggregate endpoint's
parameter validation (Routes.hs:1296-1318) with an Applicative chain
over Validation (NonEmpty Text). A request with multiple bad query
params (e.g. invalid `scope` AND invalid `aggregate`) now reports
every error at once, joined by "; ", instead of just the first.

Categorically: Validation accumulates over the Semigroup of its error
type. The cross-check between `scope` and `filter_exchange_type` stays
in Handler because Validation is not a Monad — by design, since a
lawful Monad would have to short-circuit and erase the accumulation.

openapi.json byte-identical; hspec suite green at 1052/1052.
---
 src/API/Routes.hs | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/API/Routes.hs b/src/API/Routes.hs
index 15b483b2..e7ff05b9 100644
--- a/src/API/Routes.hs
+++ b/src/API/Routes.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DataKinds #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TypeOperators #-}
 
@@ -20,6 +21,7 @@ import Data.Aeson
 import qualified Data.ByteString.Lazy as BSL
 import Data.List (find, intercalate, sortBy, sortOn)
 import qualified Data.List.NonEmpty as NE
+import qualified Data.Validation as V
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe)
 import Data.OpenApi (OpenApi, ToSchema)
@@ -1293,29 +1295,38 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         groupByParam
         aggregateParam = do
             (db, sharedSolver) <- requireDatabaseByName dbManager dbName
-            scope <- case scopeParam of
-                Just "direct" -> return Agg.ScopeDirect
-                Just "supply_chain" -> return Agg.ScopeSupplyChain
-                Just "biosphere" -> return Agg.ScopeBiosphere
-                _ -> throwError err400{errBody = "scope must be one of: direct | supply_chain | biosphere"}
-            exchangeType <- case fexchangeTypeParam of
-                Nothing -> return Nothing
-                Just "technosphere" -> return (Just Agg.KindTechnosphere)
-                Just "biosphere" -> return (Just Agg.KindBiosphere)
-                Just "waste" -> return (Just Agg.KindWaste)
-                Just _ -> throwError err400{errBody = "filter_exchange_type must be one of: technosphere | biosphere | waste"}
+            -- Field-level validation via the Validation Applicative (accumulating).
+            -- A request with both an invalid `scope` and an invalid `aggregate`
+            -- now reports both errors at once, instead of just the first.
+            let parseScope = \case
+                    Just "direct" -> V.Success Agg.ScopeDirect
+                    Just "supply_chain" -> V.Success Agg.ScopeSupplyChain
+                    Just "biosphere" -> V.Success Agg.ScopeBiosphere
+                    _ -> V.failure "scope must be one of: direct | supply_chain | biosphere"
+                parseExType = \case
+                    Nothing -> V.Success Nothing
+                    Just "technosphere" -> V.Success (Just Agg.KindTechnosphere)
+                    Just "biosphere" -> V.Success (Just Agg.KindBiosphere)
+                    Just "waste" -> V.Success (Just Agg.KindWaste)
+                    Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste"
+                parseAgg = \case
+                    Nothing -> V.Success Agg.AggSum
+                    Just "sum_quantity" -> V.Success Agg.AggSum
+                    Just "count" -> V.Success Agg.AggCount
+                    Just "share" -> V.Success Agg.AggShare
+                    Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")")
+            (scope, exchangeType, aggFn) <-
+                case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of
+                    Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))}
+                    Right v -> pure v
+            -- Cross-check requires the parsed scope value, so it runs after the
+            -- Applicative phase. Validation is not a Monad, by design.
             case (exchangeType, scope) of
                 (Just _, Agg.ScopeBiosphere) ->
                     throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"}
                 (Just _, Agg.ScopeSupplyChain) ->
                     throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"}
                 _ -> return ()
-            aggFn <- case aggregateParam of
-                Nothing -> return Agg.AggSum
-                Just "sum_quantity" -> return Agg.AggSum
-                Just "count" -> return Agg.AggCount
-                Just "share" -> return Agg.AggShare
-                Just other -> throwError err400{errBody = "aggregate must be one of: sum_quantity | count | share (got " <> BSL.fromStrict (T.encodeUtf8 other) <> ")"}
             let presetFilters = expandPreset classificationPresets presetParam
                 explicitFilters = mapMaybe parseClassFilter fclassParams
                 params =

From d9cdea8ea59b0fdd27f296491d3e74ccc1a7fe8c Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:55:21 +0200
Subject: [PATCH 06/43] refactor(loader): UnlinkedSummary becomes a Monoid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the hand-rolled emptyUnlinkedSummary / mergeUnlinkedSummaries
pair with a Semigroup + Monoid instance on UnlinkedSummary. Call sites
that used `foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries`
collapse to `mconcat summaries`; the scattered `emptyUnlinkedSummary`
constructor calls in fixExchangeLink and fixExchangeLinkByName become
`mempty`.

Categorically: UnlinkedSummary is the product of four monoids — a
Map T.Text [UnlinkedExchange] under unionWith (++), plus three Int
counters under addition. The instance is hand-written rather than
derived via Generically because bare Int has no canonical Monoid
(Sum vs Product is ambiguous) and wrapping the fields as Sum Int
would have leaked through every constructor and accessor.

Tests in test/LoaderSpec.hs updated to exercise the new <>/mempty
surface.
---
 src/Database/Loader.hs | 48 ++++++++++++++++++++----------------------
 test/LoaderSpec.hs     | 16 +++++++-------
 2 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs
index 0898d18a..405a240d 100644
--- a/src/Database/Loader.hs
+++ b/src/Database/Loader.hs
@@ -68,8 +68,6 @@ module Database.Loader (
     generateActivityUUIDFromActivity,
     getReferenceProductUUID,
     UnlinkedSummary (..),
-    emptyUnlinkedSummary,
-    mergeUnlinkedSummaries,
     buildSupplierIndex,
     buildSupplierIndexByName,
     fixExchangeLinkByName,
@@ -234,7 +232,14 @@ data UnlinkedExchange = UnlinkedExchange
     }
     deriving (Eq, Ord, Show)
 
--- | Summary of unlinked exchanges grouped by consumer activity
+{- | Summary of unlinked exchanges grouped by consumer activity.
+
+Categorically the product of four monoids:
+'M.unionWith (++)' on the activity map, and addition on each 'Int'
+counter. We hand-write the instance because bare 'Int' has no canonical
+'Monoid' (Sum vs Product is ambiguous); wrapping the fields as 'Sum Int'
+would have leaked through every constructor and accessor.
+-}
 data UnlinkedSummary = UnlinkedSummary
     { usActivities :: !(M.Map T.Text [UnlinkedExchange]) -- consumer name → list of unlinked exchanges
     , usTotalLinks :: !Int
@@ -243,19 +248,12 @@ data UnlinkedSummary = UnlinkedSummary
     }
     deriving (Show)
 
--- | Empty unlinked summary
-emptyUnlinkedSummary :: UnlinkedSummary
-emptyUnlinkedSummary = UnlinkedSummary M.empty 0 0 0
+instance Semigroup UnlinkedSummary where
+    UnlinkedSummary a1 t1 f1 m1 <> UnlinkedSummary a2 t2 f2 m2 =
+        UnlinkedSummary (M.unionWith (++) a1 a2) (t1 + t2) (f1 + f2) (m1 + m2)
 
--- | Merge two unlinked summaries
-mergeUnlinkedSummaries :: UnlinkedSummary -> UnlinkedSummary -> UnlinkedSummary
-mergeUnlinkedSummaries s1 s2 =
-    UnlinkedSummary
-        { usActivities = M.unionWith (++) (usActivities s1) (usActivities s2)
-        , usTotalLinks = usTotalLinks s1 + usTotalLinks s2
-        , usFoundLinks = usFoundLinks s1 + usFoundLinks s2
-        , usMissingLinks = usMissingLinks s1 + usMissingLinks s2
-        }
+instance Monoid UnlinkedSummary where
+    mempty = UnlinkedSummary M.empty 0 0 0
 
 -- | Report grouped summary of unlinked exchanges
 reportUnlinkedSummary :: UnlinkedSummary -> IO ()
@@ -412,7 +410,7 @@ fixAllActivities :: ExchangeLinkContext -> ActivityMap -> (ActivityMap, Unlinked
 fixAllActivities ctx activities =
     let results = M.map (fixActivityExchanges ctx) activities
         summaries = map snd $ M.elems results
-        combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries
+        combinedSummary = mconcat summaries
         fixedActivities = M.map fst results
      in (fixedActivities, combinedSummary)
 
@@ -420,7 +418,7 @@ fixAllActivities ctx activities =
 fixActivityExchanges :: ExchangeLinkContext -> Activity -> (Activity, UnlinkedSummary)
 fixActivityExchanges ctx act =
     let (fixedExchanges, summaries) = unzip $ map (fixExchangeLink ctx (activityName act)) (exchanges act)
-        combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries
+        combinedSummary = mconcat summaries
      in (act{exchanges = fixedExchanges}, combinedSummary)
 
 {- | Fix a single exchange's activity link by (flowName, location) match.
@@ -462,13 +460,13 @@ fixExchangeLink ExchangeLinkContext{..} consumerName ex@TechnosphereExchange{tec
                                             Nothing -> unlinked flow lookupLoc
                 Nothing ->
                     (ex, UnlinkedSummary M.empty 1 0 1)
-    | otherwise = (ex, emptyUnlinkedSummary)
-fixExchangeLink _ _ ex@BiosphereExchange{} = (ex, emptyUnlinkedSummary)
+    | otherwise = (ex, mempty)
+fixExchangeLink _ _ ex@BiosphereExchange{} = (ex, mempty)
 -- A WasteExchange in input direction (consumed by treatment) would benefit
 -- from the same supplier-lookup logic as a technosphere Input, but at this
 -- stage we leave waste links to the cross-DB linker (see CrossLinking) and
 -- the downstream parsers. Pure pass-through here.
-fixExchangeLink _ _ ex@WasteExchange{} = (ex, emptyUnlinkedSummary)
+fixExchangeLink _ _ ex@WasteExchange{} = (ex, mempty)
 
 {- |
 Load all EcoSpold files with optimized parallel processing and deduplication.
@@ -567,7 +565,7 @@ fixAllActivitiesByName :: NameOnlyIndex -> TechFlowDB -> ActivityMap -> (Activit
 fixAllActivitiesByName idx techFlowDb activities =
     let results = M.map (fixActivityExchangesByName idx techFlowDb) activities
         summaries = map snd $ M.elems results
-        combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries
+        combinedSummary = mconcat summaries
         fixedActivities = M.map fst results
      in (fixedActivities, combinedSummary)
 
@@ -575,7 +573,7 @@ fixAllActivitiesByName idx techFlowDb activities =
 fixActivityExchangesByName :: NameOnlyIndex -> TechFlowDB -> Activity -> (Activity, UnlinkedSummary)
 fixActivityExchangesByName idx techFlowDb act =
     let (fixedExchanges, summaries) = unzip $ map (fixExchangeLinkByName idx techFlowDb (activityName act)) (exchanges act)
-        combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries
+        combinedSummary = mconcat summaries
      in (act{exchanges = fixedExchanges}, combinedSummary)
 
 {- | Fix a single exchange's activity link using name-only matching.
@@ -608,10 +606,10 @@ fixExchangeLinkByName idx techFlowDb consumerName ex@TechnosphereExchange{techFl
             Nothing ->
                 -- Flow not in technosphere map — shouldn't happen but be safe
                 (ex, UnlinkedSummary M.empty 1 0 1)
-    | otherwise = (ex, emptyUnlinkedSummary) -- Reference products: nothing to relink
-fixExchangeLinkByName _ _ _ ex@BiosphereExchange{} = (ex, emptyUnlinkedSummary)
+    | otherwise = (ex, mempty) -- Reference products: nothing to relink
+fixExchangeLinkByName _ _ _ ex@BiosphereExchange{} = (ex, mempty)
 -- Waste link resolution is deferred to the cross-DB linker path.
-fixExchangeLinkByName _ _ _ ex@WasteExchange{} = (ex, emptyUnlinkedSummary)
+fixExchangeLinkByName _ _ _ ex@WasteExchange{} = (ex, mempty)
 
 -- | Load EcoSpold files from directory
 loadEcoSpoldDirectory :: M.Map T.Text T.Text -> FilePath -> IO (Either T.Text SimpleDatabase)
diff --git a/test/LoaderSpec.hs b/test/LoaderSpec.hs
index afc0da3c..a51d16d0 100644
--- a/test/LoaderSpec.hs
+++ b/test/LoaderSpec.hs
@@ -143,26 +143,26 @@ spec = do
             getReferenceProductUUID act `shouldBe` UUID.nil
 
     -- -----------------------------------------------------------------------
-    -- mergeUnlinkedSummaries
+    -- UnlinkedSummary Monoid (product of monoids: Map-union + 3× Int addition)
     -- -----------------------------------------------------------------------
-    describe "mergeUnlinkedSummaries" $ do
-        it "sums all counters" $ do
+    describe "UnlinkedSummary Monoid" $ do
+        it "sums all counters via (<>)" $ do
             let s1 = UnlinkedSummary M.empty 10 8 2
                 s2 = UnlinkedSummary M.empty 5 3 2
-                m = mergeUnlinkedSummaries s1 s2
+                m = s1 <> s2
             usTotalLinks m `shouldBe` 15
             usFoundLinks m `shouldBe` 11
             usMissingLinks m `shouldBe` 4
 
-        it "unions activity maps" $ do
+        it "unions activity maps via (<>)" $ do
             let s1 = UnlinkedSummary (M.singleton "actA" []) 1 0 1
                 s2 = UnlinkedSummary (M.singleton "actB" []) 1 0 1
-                m = mergeUnlinkedSummaries s1 s2
+                m = s1 <> s2
             M.size (usActivities m) `shouldBe` 2
 
-        it "emptyUnlinkedSummary is the identity" $ do
+        it "mempty is the identity" $ do
             let s = UnlinkedSummary M.empty 3 2 1
-                m = mergeUnlinkedSummaries s emptyUnlinkedSummary
+                m = s <> mempty
             usTotalLinks m `shouldBe` 3
             usFoundLinks m `shouldBe` 2
             usMissingLinks m `shouldBe` 1

From 4c1ceaaebdbea019864612e212cb9b8177b409f2 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 00:56:59 +0200
Subject: [PATCH 07/43] feat(app): introduce AppM = ReaderT AppEnv Handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add App.Env exposing:
  - AppEnv: the read-only application environment (DatabaseManager,
    max tree depth, optional password, hosting config, classification
    presets) currently threaded explicitly through five lcaServer
    parameters and into every handler.
  - AppM = ReaderT AppEnv Handler, with GeneralizedNewtypeDeriving on
    Functor / Applicative / Monad / MonadIO / MonadReader / MonadError.
  - runApp :: AppEnv -> AppM a -> Handler a, the natural transformation
    AppM ~> Handler that Servant's hoistServer will use to lift a
    ServerT api AppM into ServerT api Handler at the API boundary.
  - Five "Has*" capability classes (HasDatabaseManager, HasMaxTreeDepth,
    HasPassword, HasHostingConfig, HasClassificationPresets) — narrow
    typeclass projections of the environment so handlers can declare
    only the slice they consume.

Pure addition. No caller migrated in this commit. The next commit
threads AppM through lcaServer and the ~30 handler bodies, removing
the requireDatabaseByName + getMergedUnitConfig boilerplate that
currently begins every handler.

Categorically: AppM lives in the Kleisli category of Reader AppEnv
lifted into Handler's Kleisli category. The Has-pattern is the
projection morphism from the environment object onto each component.
---
 src/App/Env.hs | 129 +++++++++++++++++++++++++++++++++++++++++++++++++
 volca.cabal    |   1 +
 2 files changed, 130 insertions(+)
 create mode 100644 src/App/Env.hs

diff --git a/src/App/Env.hs b/src/App/Env.hs
new file mode 100644
index 00000000..9cd1d3b6
--- /dev/null
+++ b/src/App/Env.hs
@@ -0,0 +1,129 @@
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE RankNTypes #-}
+
+{- | The 'AppM' capability monad and the read-only environment that every
+HTTP handler closes over.
+
+The categorical structure: 'AppM' is a 'Reader' monad transformer over
+Servant's 'Handler', so it lives in the Kleisli category of @Reader
+AppEnv@ lifted into @Handler@'s Kleisli category. 'hoistServer' (used
+in 'API.Routes.lcaServer') is the natural transformation
+@forall a. AppM a -> Handler a@ that turns a @ServerT api AppM@ into a
+@ServerT api Handler@ — Servant doesn't care about the monad, as long
+as we can collapse it down to 'Handler' at the boundary.
+
+The 'Has*' typeclasses are narrow capability witnesses: a function with
+@(MonadReader r m, HasDatabaseManager r, MonadIO m) => m a@ declares
+exactly the slice of the environment it needs, without committing to
+the concrete monad. This is the "Has-pattern" of mtl-style: capability
+classes are *projections* out of the environment object.
+-}
+module App.Env (
+    -- * Environment
+    AppEnv (..),
+    mkAppEnv,
+
+    -- * Monad
+    AppM (..),
+    runApp,
+
+    -- * Capability classes (Has-pattern)
+    HasDatabaseManager (..),
+    HasMaxTreeDepth (..),
+    HasPassword (..),
+    HasHostingConfig (..),
+    HasClassificationPresets (..),
+) where
+
+import Control.Monad.Except (MonadError)
+import Control.Monad.IO.Class (MonadIO)
+import Control.Monad.Reader (MonadReader, ReaderT (..))
+import qualified Config
+import Database.Manager (DatabaseManager)
+import Servant (Handler, ServerError)
+
+-- | Read-only application environment threaded through every request.
+data AppEnv = AppEnv
+    { aeDbManager :: !DatabaseManager
+    , aeMaxTreeDepth :: !Int
+    , aePassword :: !(Maybe String)
+    , aeHostingConfig :: !(Maybe Config.HostingConfig)
+    , aeClassificationPresets :: ![Config.ClassificationPreset]
+    }
+
+-- | Smart constructor — keeps callers from positionally swapping fields.
+mkAppEnv
+    :: DatabaseManager
+    -> Int
+    -> Maybe String
+    -> Maybe Config.HostingConfig
+    -> [Config.ClassificationPreset]
+    -> AppEnv
+mkAppEnv = AppEnv
+
+{- | Servant 'Handler' threaded with a read-only 'AppEnv'. Deriving the
+@MonadReader@ / @MonadIO@ / @MonadError ServerError@ instances via
+@GeneralizedNewtypeDeriving@ keeps the wrapper free at runtime: 'AppM'
+is representationally a function @AppEnv -> IO (Either ServerError a)@
+under the hood, identical to 'Handler' modulo the @AppEnv@ argument.
+-}
+newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a}
+    deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError)
+
+{- | Discharge an 'AppM' computation against a concrete environment,
+producing a plain Servant 'Handler'. Use in 'API.Routes.lcaServer' as
+@hoistServer lcaAPI (runApp env) handlers@.
+
+This is the natural transformation @AppM ~> Handler@ that Servant's
+@hoistServer@ requires; it lifts the entire @ServerT api AppM@ into
+@ServerT api Handler@ point-free.
+-}
+runApp :: AppEnv -> AppM a -> Handler a
+runApp env (AppM m) = runReaderT m env
+
+-- ---------------------------------------------------------------------------
+-- Has-pattern: narrow capability witnesses
+-- ---------------------------------------------------------------------------
+
+{- | Witness that the environment exposes a 'DatabaseManager'. Handlers
+that need DB access should constrain on @HasDatabaseManager r@ rather
+than the concrete 'AppEnv', so the same code can run in tests with a
+narrower env.
+-}
+class HasDatabaseManager r where
+    getDatabaseManager :: r -> DatabaseManager
+
+instance HasDatabaseManager AppEnv where
+    getDatabaseManager = aeDbManager
+
+-- | Max tree depth limit (anti-DoS guard for /tree and /graph endpoints).
+class HasMaxTreeDepth r where
+    getMaxTreeDepth :: r -> Int
+
+instance HasMaxTreeDepth AppEnv where
+    getMaxTreeDepth = aeMaxTreeDepth
+
+-- | Optional admin password gating @POST /auth@.
+class HasPassword r where
+    getPassword :: r -> Maybe String
+
+instance HasPassword AppEnv where
+    getPassword = aePassword
+
+-- | Hosting configuration consumed by @GET /hosting@.
+class HasHostingConfig r where
+    getHostingConfig :: r -> Maybe Config.HostingConfig
+
+instance HasHostingConfig AppEnv where
+    getHostingConfig = aeHostingConfig
+
+-- | Classification presets used in @/aggregate@ and @/supply-chain@ filters.
+class HasClassificationPresets r where
+    getClassificationPresets :: r -> [Config.ClassificationPreset]
+
+instance HasClassificationPresets AppEnv where
+    getClassificationPresets = aeClassificationPresets
diff --git a/volca.cabal b/volca.cabal
index 458a07d9..a5269356 100644
--- a/volca.cabal
+++ b/volca.cabal
@@ -23,6 +23,7 @@ library
   import:              warnings
   hs-source-dirs:      src
   exposed-modules:     Types
+                     , App.Env
                      , Data.Validation
                      , Progress
                      , UnitConversion

From d0a9b1f4faae4e1caaa97c74518a2585d7e856fd Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 01:08:14 +0200
Subject: [PATCH 08/43] refactor(api): migrate Servant handlers to AppM via
 hoistServer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert lcaServer and the underlying handler / helper layer in
src/API/Routes.hs and src/API/DatabaseHandlers.hs to live in the AppM
capability monad (ReaderT AppEnv Handler) introduced in the prior
commit. The natural transformation `runApp env :: forall a. AppM a ->
Handler a` is applied via Servant.hoistServer at the API boundary, so
the LCAAPI route type itself is unchanged and openapi.json stays
byte-identical against the pre-branch baseline.

What changes architecturally:
  - lcaServer :: DatabaseManager -> Int -> Maybe String
      -> Maybe Config.HostingConfig -> [Config.ClassificationPreset]
      -> Server LCAAPI
    becomes lcaServer :: AppEnv -> Server LCAAPI, with the five env
    fields exposed via let-bindings inside the where block for the
    handlers' closure-style access (LOC-equivalent to inserting an
    `asks` at every call site, and easier to read).
  - All ~30 fanout calls of DBHandlers.X dbManager collapse to
    DBHandlers.X — DBHandlers is now AppM-typed end to end.
  - The shared cross-DB helpers (requireDatabaseByName,
    requireFullyLinked, inventoryWithDeps, solutionWithDeps,
    inventoriesWithDeps, solutionsWithDeps, activityLCIABatchH,
    batchImpactsH) drop their DatabaseManager-as-first-arg parameter
    and read it from the env via `asks getDatabaseManager`.
  - app/Main.hs packs the AppEnv once with mkAppEnv before passing it
    to lcaServer.
  - src/API/BatchImpacts.hs (the non-Servant MCP-side wrapper) builds
    a minimal AppEnv on the fly and discharges the AppM with runApp +
    Servant.runHandler.

What does *not* change:
  - The LCAAPI route surface and JSON contract (openapi.json
    byte-identical).
  - The full hspec suite (1052/1052) is green.
  - The handler logic and error mapping.

This commit is roughly LOC-neutral on its own (the `asks` insertions
in helpers offset the arg removals at call sites), but it is the
foundation that the next commit (optparse dedup) and any future
capability-style refactor (MCP Freer, finer Has-classes per endpoint,
applicative request validation) build on.

Categorically: lcaServer now lives in the Kleisli category of `Reader
AppEnv` lifted into Handler's Kleisli category. hoistServer is the
ServerT-functor's action on the natural transformation AppM ~> Handler.
---
 app/Main.hs                 |   3 +-
 src/API/BatchImpacts.hs     |  20 +--
 src/API/DatabaseHandlers.hs | 122 ++++++++------
 src/API/Routes.hs           | 314 +++++++++++++++++++-----------------
 4 files changed, 251 insertions(+), 208 deletions(-)

diff --git a/app/Main.hs b/app/Main.hs
index 6a3892b4..62593b08 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -37,6 +37,7 @@ import Progress
 import API.Licenses (licensesResponse)
 import API.MCP (mcpApp, toolDefinitions)
 import API.Routes (lcaAPI, lcaServer, volcaOpenApi)
+import App.Env (mkAppEnv)
 import Data.Aeson (encode)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as C8
@@ -284,7 +285,7 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon
                 respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml
             | path == "/api/v1/logs/stream" -> handleLogStream req respond
             | C8.pack "/api/" `BS.isPrefixOf` path ->
-                serve lcaAPI (lcaServer dbManager maxTreeDepth password hostingConfig filterPresets) req respond
+                serve lcaAPI (lcaServer (mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets)) req respond
             | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped
             | otherwise -> serveSpaIndex
 
diff --git a/src/API/BatchImpacts.hs b/src/API/BatchImpacts.hs
index b7b25810..1624ff49 100644
--- a/src/API/BatchImpacts.hs
+++ b/src/API/BatchImpacts.hs
@@ -21,6 +21,7 @@ module API.BatchImpacts (
 
 import API.Routes (activityLCIABatchH, batchImpactsH, collectionNotLoadedPrefix, databaseNotLoadedPrefix)
 import API.Types (BatchImpactsRequest (..), BatchImpactsResponse, LCIABatchResult, SubstitutionRequest)
+import App.Env (mkAppEnv, runApp)
 import Control.Concurrent.STM (readTVarIO)
 import qualified Data.ByteString.Lazy as BSL
 import qualified Data.Map as M
@@ -77,7 +78,8 @@ runActivityLCIABatch ::
     Maybe SubstitutionRequest ->
     IO (Either BatchError LCIABatchResult)
 runActivityLCIABatch dbm dbName pid coll mSub = do
-    res <- Servant.runHandler (activityLCIABatchH dbm dbName pid coll mSub)
+    let env = mkAppEnv dbm 0 Nothing Nothing []
+    res <- Servant.runHandler (runApp env (activityLCIABatchH dbName pid coll mSub))
     case res of
         Right lbr -> pure (Right lbr)
         Left se -> Left <$> translateErrorIO dbm se
@@ -99,15 +101,15 @@ runBatchImpacts ::
     [Text] ->
     IO (Either BatchError BatchImpactsResponse)
 runBatchImpacts dbm dbName coll topFlows pids = do
+    let env = mkAppEnv dbm 0 Nothing Nothing []
     res <-
-        Servant.runHandler
-            ( batchImpactsH
-                dbm
-                dbName
-                coll
-                topFlows
-                (BatchImpactsRequest{birProcessIds = pids})
-            )
+        Servant.runHandler $
+            runApp env $
+                batchImpactsH
+                    dbName
+                    coll
+                    topFlows
+                    (BatchImpactsRequest{birProcessIds = pids})
     case res of
         Right r -> pure (Right r)
         Left se -> Left <$> translateErrorIO dbm se
diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs
index abb4782d..c87f21fe 100644
--- a/src/API/DatabaseHandlers.hs
+++ b/src/API/DatabaseHandlers.hs
@@ -131,17 +131,21 @@ import Database.Upload (
  )
 import qualified Database.UploadedDatabase as UploadedDB
 import Types (Database (..), GeographyPolicy (..), unresolvedCount)
+import App.Env (AppM, HasDatabaseManager (..))
+import Control.Monad.Reader (asks)
 
 -- | List all databases
-getDatabases :: DatabaseManager -> Handler DatabaseListResponse
-getDatabases dbManager = do
+getDatabases :: AppM DatabaseListResponse
+getDatabases = do
+    dbManager <- asks getDatabaseManager
     dbStatuses <- liftIO $ listDatabases dbManager
     let statusList = map convertDbStatus dbStatuses
     return $ DatabaseListResponse statusList
 
 -- | Load a database on demand
-loadDatabaseHandler :: DatabaseManager -> Text -> Handler LoadDatabaseResponse
-loadDatabaseHandler dbManager dbName = do
+loadDatabaseHandler :: Text -> AppM LoadDatabaseResponse
+loadDatabaseHandler dbName= do
+    dbManager <- asks getDatabaseManager
     eitherResult <- liftIO $ try $ loadDatabase dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -152,16 +156,18 @@ loadDatabaseHandler dbManager dbName = do
             return $ LoadSucceeded status depResults
 
 -- | Unload a database from memory
-unloadDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse
-unloadDatabaseHandler dbManager dbName =
+unloadDatabaseHandler :: Text -> AppM ActivateResponse
+unloadDatabaseHandler dbName = do
+    dbManager <- asks getDatabaseManager
     simpleAction (unloadDatabase dbManager dbName) ("Unloaded database: " <> dbName)
 
 {- | Re-run cross-DB linking for a loaded database against the currently-loaded
 dependency databases. Lets the user recover from loads that happened in a
 suboptimal order without reloading the whole database.
 -}
-relinkDatabaseHandler :: DatabaseManager -> Text -> Handler RelinkResponse
-relinkDatabaseHandler dbManager dbName = do
+relinkDatabaseHandler :: Text -> AppM RelinkResponse
+relinkDatabaseHandler dbName= do
+    dbManager <- asks getDatabaseManager
     res <- liftIO $ relinkDatabase dbManager dbName
     case res of
         Left err -> throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 err}
@@ -176,13 +182,15 @@ relinkDatabaseHandler dbManager dbName = do
                     }
 
 -- | Delete an uploaded database (move to trash)
-deleteDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse
-deleteDatabaseHandler dbManager dbName =
+deleteDatabaseHandler :: Text -> AppM ActivateResponse
+deleteDatabaseHandler dbName = do
+    dbManager <- asks getDatabaseManager
     simpleAction (removeDatabase dbManager dbName) ("Deleted database: " <> dbName)
 
 -- | Upload a new database
-uploadDatabaseHandler :: DatabaseManager -> UploadRequest -> Handler UploadResponse
-uploadDatabaseHandler dbManager req = do
+uploadDatabaseHandler :: UploadRequest -> AppM UploadResponse
+uploadDatabaseHandler req= do
+    dbManager <- asks getDatabaseManager
     -- Decode base64 ZIP data
     let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case zipDataResult of
@@ -314,8 +322,9 @@ formatToText UnknownFormat = "unknown"
 {- | Get database setup info
 Returns completeness, missing suppliers, and dependency suggestions
 -}
-getDatabaseSetupHandler :: DatabaseManager -> Text -> Handler DatabaseSetupInfo
-getDatabaseSetupHandler dbManager dbName = do
+getDatabaseSetupHandler :: Text -> AppM DatabaseSetupInfo
+getDatabaseSetupHandler dbName= do
+    dbManager <- asks getDatabaseManager
     eitherResult <- liftIO $ try $ getDatabaseSetupInfo dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -327,8 +336,9 @@ getDatabaseSetupHandler dbManager dbName = do
 {- | Add a dependency to a staged database
 Runs cross-DB linking and returns updated setup info
 -}
-addDependencyHandler :: DatabaseManager -> Text -> Text -> Handler DatabaseSetupInfo
-addDependencyHandler dbManager dbName depName = do
+addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
+addDependencyHandler dbName depName= do
+    dbManager <- asks getDatabaseManager
     result <- liftIO $ addDependencyToStaged dbManager dbName depName
     case result of
         Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
@@ -337,16 +347,18 @@ addDependencyHandler dbManager dbName depName = do
 {- | Remove a dependency from a staged database
 Re-runs cross-DB linking and returns updated setup info
 -}
-removeDependencyHandler :: DatabaseManager -> Text -> Text -> Handler DatabaseSetupInfo
-removeDependencyHandler dbManager dbName depName = do
+removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
+removeDependencyHandler dbName depName= do
+    dbManager <- asks getDatabaseManager
     result <- liftIO $ removeDependencyFromStaged dbManager dbName depName
     case result of
         Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
         Right setupInfo -> return setupInfo
 
 -- | Change the data path for an uploaded (staged) database
-setDataPathHandler :: DatabaseManager -> Text -> Value -> Handler DatabaseSetupInfo
-setDataPathHandler dbManager dbName body = do
+setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo
+setDataPathHandler dbName body= do
+    dbManager <- asks getDatabaseManager
     -- Extract "path" from JSON body
     let mPath = case body of
             A.Object obj -> case KM.lookup "path" obj of
@@ -364,8 +376,9 @@ setDataPathHandler dbManager dbName body = do
 {- | Finalize a staged database
 Builds matrices and makes it ready for queries
 -}
-finalizeDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse
-finalizeDatabaseHandler dbManager dbName = do
+finalizeDatabaseHandler :: Text -> AppM ActivateResponse
+finalizeDatabaseHandler dbName= do
+    dbManager <- asks getDatabaseManager
     eitherResult <- liftIO $ try $ finalizeDatabase dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -378,8 +391,9 @@ finalizeDatabaseHandler dbManager dbName = do
 {- | Upload a new method collection
 Same flow as database upload but creates MethodConfig entry
 -}
-uploadMethodHandler :: DatabaseManager -> UploadRequest -> Handler UploadResponse
-uploadMethodHandler dbManager req = do
+uploadMethodHandler :: UploadRequest -> AppM UploadResponse
+uploadMethodHandler req= do
+    dbManager <- asks getDatabaseManager
     let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case zipDataResult of
         Left err -> return $ UploadResponse False ("Invalid base64 data: " <> T.pack err) Nothing Nothing
@@ -433,12 +447,13 @@ uploadMethodHandler dbManager req = do
                             (Just $ formatToText $ urFormat uploadResult)
 
 -- | Delete an uploaded method collection
-deleteMethodHandler :: DatabaseManager -> Text -> Handler ActivateResponse
-deleteMethodHandler dbManager name =
+deleteMethodHandler :: Text -> AppM ActivateResponse
+deleteMethodHandler name = do
+    dbManager <- asks getDatabaseManager
     simpleAction (removeMethodCollection dbManager name) ("Deleted method: " <> name)
 
 -- | Common pattern: run an IO action that returns Either Text (), map to ActivateResponse
-simpleAction :: IO (Either Text ()) -> Text -> Handler ActivateResponse
+simpleAction :: IO (Either Text ()) -> Text -> AppM ActivateResponse
 simpleAction action successMsg = do
     result <- liftIO action
     return $ case result of
@@ -481,29 +496,34 @@ convertRefDataStatus s =
         , rdaEntryCount = rdsEntryCount s
         }
 
-listRefData :: RefDataKind -> DatabaseManager -> Handler RefDataListResponse
-listRefData kind mgr = do
+listRefData :: RefDataKind -> AppM RefDataListResponse
+listRefData kind = do
+    dbManager <- asks getDatabaseManager
     let (listFn, _, _, _, _, _) = rdOps kind
-    statuses <- liftIO $ listFn mgr
+    statuses <- liftIO $ listFn dbManager
     return $ RefDataListResponse (map convertRefDataStatus statuses)
 
-loadRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse
-loadRefData kind mgr name = do
+loadRefData :: RefDataKind -> Text -> AppM ActivateResponse
+loadRefData kind name = do
+    dbManager <- asks getDatabaseManager
     let (_, loadFn, _, _, _, _) = rdOps kind
-    simpleAction (loadFn mgr name) ("Loaded: " <> name)
+    simpleAction (loadFn dbManager name) ("Loaded: " <> name)
 
-unloadRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse
-unloadRefData kind mgr name = do
+unloadRefData :: RefDataKind -> Text -> AppM ActivateResponse
+unloadRefData kind name = do
+    dbManager <- asks getDatabaseManager
     let (_, _, unloadFn, _, _, _) = rdOps kind
-    simpleAction (unloadFn mgr name) ("Unloaded: " <> name)
+    simpleAction (unloadFn dbManager name) ("Unloaded: " <> name)
 
-deleteRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse
-deleteRefData kind mgr name = do
+deleteRefData :: RefDataKind -> Text -> AppM ActivateResponse
+deleteRefData kind name = do
+    dbManager <- asks getDatabaseManager
     let (_, _, _, _, removeFn, _) = rdOps kind
-    simpleAction (removeFn mgr name) ("Deleted: " <> name)
+    simpleAction (removeFn dbManager name) ("Deleted: " <> name)
 
-uploadRefData :: RefDataKind -> DatabaseManager -> UploadRequest -> Handler UploadResponse
-uploadRefData kind mgr req = do
+uploadRefData :: RefDataKind -> UploadRequest -> AppM UploadResponse
+uploadRefData kind req = do
+    dbManager <- asks getDatabaseManager
     let (_, _, _, addFn, _, subdir) = rdOps kind
     let csvDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case csvDataResult of
@@ -535,24 +555,26 @@ uploadRefData kind mgr req = do
                         , rdIsAuto = False
                         , rdDescription = urDescription req
                         }
-            liftIO $ addFn mgr rd
+            liftIO $ addFn dbManager rd
             return $ UploadResponse True "Uploaded successfully" (Just slug) Nothing
   where
     quote t = "\"" <> T.replace "\"" "\\\"" t <> "\""
 
-getFlowSynonymGroupsHandler :: DatabaseManager -> Text -> Handler SynonymGroupsResponse
-getFlowSynonymGroupsHandler mgr name = do
-    result <- liftIO $ getFlowSynonymGroups mgr name
+getFlowSynonymGroupsHandler :: Text -> AppM SynonymGroupsResponse
+getFlowSynonymGroupsHandler name = do
+    dbManager <- asks getDatabaseManager
+    result <- liftIO $ getFlowSynonymGroups dbManager name
     case result of
         Left err -> throwError $ err404{errBody = BSL.fromStrict $ T.encodeUtf8 err}
         Right groups -> return $ SynonymGroupsResponse groups
 
-downloadRefDataHandler :: RefDataKind -> DatabaseManager -> Text -> Handler (Headers '[Header "Content-Disposition" Text] BinaryContent)
-downloadRefDataHandler kind mgr name = do
+downloadRefDataHandler :: RefDataKind -> Text -> AppM (Headers '[Header "Content-Disposition" Text] BinaryContent)
+downloadRefDataHandler kind name = do
+    dbManager <- asks getDatabaseManager
     let tvar = case kind of
-            FlowSynonyms -> dmAvailableFlowSyns mgr
-            CompartmentMappings -> dmAvailableCompMaps mgr
-            UnitDefs -> dmAvailableUnitDefs mgr
+            FlowSynonyms -> dmAvailableFlowSyns dbManager
+            CompartmentMappings -> dmAvailableCompMaps dbManager
+            UnitDefs -> dmAvailableUnitDefs dbManager
     available <- liftIO $ readTVarIO tvar
     case M.lookup name available of
         Nothing -> throwError $ err404{errBody = "Not found"}
diff --git a/src/API/Routes.hs b/src/API/Routes.hs
index e7ff05b9..3bcfc67f 100644
--- a/src/API/Routes.hs
+++ b/src/API/Routes.hs
@@ -55,6 +55,8 @@ import qualified SharedSolver
 import Tree (buildLoopAwareTree)
 import Types
 import qualified Version
+import App.Env (AppEnv (..), AppM, HasDatabaseManager (..), runApp)
+import Control.Monad.Reader (asks)
 
 -- | API type definition - RESTful design with focused endpoints
 type LCAAPI =
@@ -184,8 +186,9 @@ notLoadedBody :: Text -> Text -> BSL.ByteString
 notLoadedBody prefix name = BSL.fromStrict (T.encodeUtf8 (prefix <> name))
 
 -- | Get database by name, throw 404 if not loaded
-requireDatabaseByName :: DatabaseManager -> Text -> Handler (Database, SharedSolver)
-requireDatabaseByName dbManager dbName = do
+requireDatabaseByName :: Text -> AppM (Database, SharedSolver)
+requireDatabaseByName dbName = do
+    dbManager <- asks getDatabaseManager
     maybeLoaded <- liftIO $ getDatabase dbManager dbName
     case maybeLoaded of
         Just loaded -> return (ldDatabase loaded, ldSharedSolver loaded)
@@ -195,7 +198,7 @@ requireDatabaseByName dbManager dbName = do
 the user to load the missing dep DBs (or POST /relink) rather than
 silently undercounting impacts.
 -}
-requireFullyLinked :: Text -> Database -> Handler ()
+requireFullyLinked :: Text -> Database -> AppM ()
 requireFullyLinked dbName db =
     let n = unresolvedCount (dbLinkingStats db)
      in when (n > 0) $
@@ -217,17 +220,18 @@ requireFullyLinked dbName db =
                     }
 
 -- | Inventory with cross-DB back-substitution; maps unit-conversion errors to 422.
-inventoryWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Handler Inventory
-inventoryWithDeps dbManager dbName db solver pid =
-    SharedSolver.csInventory <$> solutionWithDeps dbManager dbName db solver pid
+inventoryWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM Inventory
+inventoryWithDeps dbName db solver pid =
+    SharedSolver.csInventory <$> solutionWithDeps dbName db solver pid
 
 {- | Cross-DB inventory + per-DB scaling vectors. The scalings are needed by
 the regionalized LCIA path (per-DB dot products summed across all DBs
 reached at request time); the inventory alone is enough for non-regional
 methods.
 -}
-solutionWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Handler SharedSolver.CrossDBSolution
-solutionWithDeps dbManager dbName db solver pid = do
+solutionWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM SharedSolver.CrossDBSolution
+solutionWithDeps dbName db solver pid = do
+    dbManager <- asks getDatabaseManager
     requireFullyLinked dbName db
     unitCfg <- liftIO $ getMergedUnitConfig dbManager
     res <-
@@ -244,13 +248,14 @@ solutionWithDeps dbManager dbName db solver pid = do
         Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err}
 
 -- | Batch inventory with cross-DB back-substitution; maps unit-conversion errors to 422.
-inventoriesWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> [ProcessId] -> Handler [Inventory]
-inventoriesWithDeps dbManager dbName db solver pids =
-    map SharedSolver.csInventory <$> solutionsWithDeps dbManager dbName db solver pids
+inventoriesWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [Inventory]
+inventoriesWithDeps dbName db solver pids =
+    map SharedSolver.csInventory <$> solutionsWithDeps dbName db solver pids
 
 -- | Batch variant of 'solutionWithDeps'.
-solutionsWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> [ProcessId] -> Handler [SharedSolver.CrossDBSolution]
-solutionsWithDeps dbManager dbName db solver pids = do
+solutionsWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [SharedSolver.CrossDBSolution]
+solutionsWithDeps dbName db solver pids = do
+    dbManager <- asks getDatabaseManager
     requireFullyLinked dbName db
     unitCfg <- liftIO $ getMergedUnitConfig dbManager
     res <-
@@ -327,7 +332,7 @@ mkCrossDBContrib dbManager rootDbName _flowDB unitDB score ((depDbName, pid), c)
                 }
 
 -- | Helper function to validate ProcessId and lookup activity
-withValidatedActivity :: Database -> Text -> (Activity -> Handler a) -> Handler a
+withValidatedActivity :: Database -> Text -> (Activity -> AppM a) -> AppM a
 withValidatedActivity db processId action = do
     case Service.resolveActivityByProcessId db processId of
         Left (Service.InvalidProcessId errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg}
@@ -338,7 +343,7 @@ withValidatedActivity db processId action = do
 {- | Helper function to validate UUID and lookup flow. Returns a tagged sum
 so callers can dispatch on tech vs bio.
 -}
-withValidatedFlow :: Database -> Text -> (FlowKind -> Handler a) -> Handler a
+withValidatedFlow :: Database -> Text -> (FlowKind -> AppM a) -> AppM a
 withValidatedFlow db uuid action = do
     case Service.validateUUID uuid of
         Left (Service.InvalidUUID errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg}
@@ -396,7 +401,7 @@ expandPreset presets (Just pn) = case find (\p -> Config.cpName p == pn) presets
 -- ============================================================================
 -- Hoisted helpers — previously in lcaServer's `where`. Lifted to top level so
 -- non-Servant callers (notably src/API/BatchImpacts.hs and any client of the
--- LCIA batch pipeline outside the Servant Handler stack) can reuse them.
+-- LCIA batch pipeline outside the Servant AppM stack) can reuse them.
 --
 -- Behavior is byte-identical to the original where-bound versions.
 -- ============================================================================
@@ -474,7 +479,7 @@ logLCIAResult result method = do
 HTTP status. Validates the resolved ProcessId against the technosphere
 matrix index too — see Service.validateProcessIdInMatrixIndex.
 -}
-resolveOrThrow :: Database -> Text -> Handler (ProcessId, Activity)
+resolveOrThrow :: Database -> Text -> AppM (ProcessId, Activity)
 resolveOrThrow db processIdText =
     case Service.resolveActivityAndProcessId db processIdText of
         Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
@@ -487,13 +492,13 @@ resolveOrThrow db processIdText =
                 Left e -> internalError e
                 Right () -> return (pid, act)
   where
-    internalError :: Service.ServiceError -> Handler a
+    internalError :: Service.ServiceError -> AppM a
     internalError e = throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show e}
 
 {- | Translate a Service-level error into the HTTP status used across the
 cross-DB LCIA paths.
 -}
-throwServiceError :: Service.ServiceError -> Handler a
+throwServiceError :: Service.ServiceError -> AppM a
 throwServiceError (Service.ActivityNotFound _) = throwError err404{errBody = "Activity not found"}
 throwServiceError (Service.InvalidProcessId msg) = throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
 -- MatrixError covers singular Sherman-Morrison, missing technosphere links,
@@ -504,7 +509,7 @@ throwServiceError (Service.InvalidUUID _) = throwError err500{errBody = "Interna
 throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Internal server error"}
 
 -- | Load a method collection by name from the live DatabaseManager state.
-loadCollection :: DatabaseManager -> Text -> Handler ([Method], [DamageCategory], [NormWeightSet], [ScoringSet])
+loadCollection :: DatabaseManager -> Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet])
 loadCollection dbManager collectionName = do
     loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager)
     case M.lookup collectionName loadedCollections of
@@ -515,9 +520,9 @@ loadCollection dbManager collectionName = do
 no-substitution path ('requireFullyLinked' runs inside 'solutionWithDeps');
 'Just' applies the substitutions through the uncached path.
 -}
-crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> Handler SharedSolver.CrossDBSolution
+crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution
 crossDBSolutionFor dbManager dbName db solver pid mSub = case mSub of
-    Nothing -> solutionWithDeps dbManager dbName db solver pid
+    Nothing -> solutionWithDeps dbName db solver pid
     Just subReq -> do
         requireFullyLinked dbName db
         unitCfg <- liftIO $ getMergedUnitConfig dbManager
@@ -772,18 +777,18 @@ buildLCIABatchResultCached dbManager dbName db actPid activity collection sol ct
         computeAllScoringSets (mcScoringSets collection) rawScoreMap
     pure (mkLCIABatchResult results mNW nwSets scoringResults (mcScoringSets collection) scoringIndicators)
 
-{- | Top-level LCIA batch entry point — Handler-returning. Used by the Servant
+{- | Top-level LCIA batch entry point — AppM-returning. Used by the Servant
 routes (via thin where-aliases) and by API.BatchImpacts.
 -}
 activityLCIABatchH ::
-    DatabaseManager ->
     Text ->
     Text ->
     Text ->
     Maybe SubstitutionRequest ->
-    Handler LCIABatchResult
-activityLCIABatchH dbManager dbName processIdText collectionName mSub = do
-    (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+    AppM LCIABatchResult
+activityLCIABatchH dbName processIdText collectionName mSub = do
+    dbManager <- asks getDatabaseManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
     (actProcessId, activity) <- resolveOrThrow db processIdText
     (methods, damageCats, nwSets, scoringSets) <- loadCollection dbManager collectionName
     let dcLookup = M.fromList [(subName, dcName dc) | dc <- damageCats, (subName, _) <- dcImpacts dc]
@@ -840,14 +845,14 @@ valid PIDs, parallel characterization. Used by the Servant POST route and
 by API.BatchImpacts.
 -}
 batchImpactsH ::
-    DatabaseManager ->
     Text ->
     Text ->
     Maybe Int ->
     BatchImpactsRequest ->
-    Handler BatchImpactsResponse
-batchImpactsH dbManager dbName collectionName topFlowsParam req = do
-    (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+    AppM BatchImpactsResponse
+batchImpactsH dbName collectionName topFlowsParam req = do
+    dbManager <- asks getDatabaseManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
     loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager)
     collection <- case M.lookup collectionName loadedCollections of
         Just mc -> pure mc
@@ -861,7 +866,7 @@ batchImpactsH dbManager dbName collectionName topFlowsParam req = do
         invalid = [pidText | (pidText, Left (Service.InvalidProcessId _)) <- resolved]
         validPidNums = [pidNum | (_, pidNum, _) <- valid]
     t0 <- liftIO getCurrentTime
-    sols <- solutionsWithDeps dbManager dbName db sharedSolver validPidNums
+    sols <- solutionsWithDeps dbName db sharedSolver validPidNums
     t1 <- liftIO getCurrentTime
     ctxs <- liftIO $ mapConcurrently (prepMethodCtx dbManager dbName db) (mcMethods collection)
     let topFlows = max 0 (fromMaybe 0 topFlowsParam)
@@ -906,12 +911,26 @@ batchImpactsH dbManager dbName collectionName topFlowsParam req = do
             , birInvalid = invalid
             }
 
-{- | API server implementation
-DatabaseManager is used to dynamically fetch current database on each request
+{- | API server implementation. Handlers live in 'AppM' (a 'ReaderT'
+'AppEnv' over Servant's 'Handler'); 'hoistServer' is the natural
+transformation that lifts the @ServerT LCAAPI AppM@ into a plain
+@Server LCAAPI@ Servant expects at the WAI boundary.
 -}
-lcaServer :: DatabaseManager -> Int -> Maybe String -> Maybe Config.HostingConfig -> [Config.ClassificationPreset] -> Server LCAAPI
-lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
-    getActivityInfo
+lcaServer :: AppEnv -> Server LCAAPI
+lcaServer env =
+    hoistServer lcaAPI (runApp env) handlers
+  where
+    -- Legacy closure access to env fields, kept so handler bodies (and
+    -- the helpers they call) don't all need an explicit `asks`. We could
+    -- replace this with capability-class projections at every call site,
+    -- but the let-binding is identical in LOC and easier to read.
+    dbManager = aeDbManager env
+    maxTreeDepth = aeMaxTreeDepth env
+    password = aePassword env
+    hostingConfig = aeHostingConfig env
+    classificationPresets = aeClassificationPresets env
+    handlers =
+        getActivityInfo
         :<|> getActivityFlows
         :<|> getActivityInputs
         :<|> getActivityOutputs
@@ -945,42 +964,42 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         :<|> searchActivitiesWithCount
         :<|> getClassifications
         :<|> postImpactsBatch
-        :<|> DBHandlers.getDatabases dbManager
-        :<|> DBHandlers.loadDatabaseHandler dbManager
-        :<|> DBHandlers.unloadDatabaseHandler dbManager
-        :<|> DBHandlers.relinkDatabaseHandler dbManager
-        :<|> DBHandlers.deleteDatabaseHandler dbManager
-        :<|> DBHandlers.uploadDatabaseHandler dbManager
-        :<|> DBHandlers.getDatabaseSetupHandler dbManager
-        :<|> DBHandlers.addDependencyHandler dbManager
-        :<|> DBHandlers.removeDependencyHandler dbManager
-        :<|> DBHandlers.setDataPathHandler dbManager
-        :<|> DBHandlers.finalizeDatabaseHandler dbManager
+        :<|> DBHandlers.getDatabases
+        :<|> DBHandlers.loadDatabaseHandler
+        :<|> DBHandlers.unloadDatabaseHandler
+        :<|> DBHandlers.relinkDatabaseHandler
+        :<|> DBHandlers.deleteDatabaseHandler
+        :<|> DBHandlers.uploadDatabaseHandler
+        :<|> DBHandlers.getDatabaseSetupHandler
+        :<|> DBHandlers.addDependencyHandler
+        :<|> DBHandlers.removeDependencyHandler
+        :<|> DBHandlers.setDataPathHandler
+        :<|> DBHandlers.finalizeDatabaseHandler
         :<|> getMethodCollections
         :<|> loadMethodCollectionHandler
         :<|> unloadMethodCollectionHandler
-        :<|> DBHandlers.deleteMethodHandler dbManager
-        :<|> DBHandlers.uploadMethodHandler dbManager
+        :<|> DBHandlers.deleteMethodHandler
+        :<|> DBHandlers.uploadMethodHandler
         -- Flow synonyms
-        :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms dbManager
-        :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms dbManager
-        :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms dbManager
-        :<|> DBHandlers.deleteRefData DBHandlers.FlowSynonyms dbManager
-        :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms dbManager
-        :<|> DBHandlers.getFlowSynonymGroupsHandler dbManager
-        :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms dbManager
+        :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms
+        :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms
+        :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms
+        :<|> DBHandlers.deleteRefData DBHandlers.FlowSynonyms
+        :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms
+        :<|> DBHandlers.getFlowSynonymGroupsHandler
+        :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms
         -- Compartment mappings
-        :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings dbManager
-        :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings dbManager
-        :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings dbManager
-        :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings dbManager
-        :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings dbManager
+        :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings
+        :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings
+        :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings
+        :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings
+        :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings
         -- Units
-        :<|> DBHandlers.listRefData DBHandlers.UnitDefs dbManager
-        :<|> DBHandlers.loadRefData DBHandlers.UnitDefs dbManager
-        :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs dbManager
-        :<|> DBHandlers.deleteRefData DBHandlers.UnitDefs dbManager
-        :<|> DBHandlers.uploadRefData DBHandlers.UnitDefs dbManager
+        :<|> DBHandlers.listRefData DBHandlers.UnitDefs
+        :<|> DBHandlers.loadRefData DBHandlers.UnitDefs
+        :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs
+        :<|> DBHandlers.deleteRefData DBHandlers.UnitDefs
+        :<|> DBHandlers.uploadRefData DBHandlers.UnitDefs
         :<|> getLogsHandler
         :<|> postAuth
         :<|> getVersion
@@ -988,11 +1007,10 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         :<|> getStats
         :<|> getClassificationPresets
         :<|> getOpenApiSpec
-  where
-    getOpenApiSpec :: Handler Value
+    getOpenApiSpec :: AppM Value
     getOpenApiSpec = return $ toJSON volcaOpenApi
 
-    getVersion :: Handler Value
+    getVersion :: AppM Value
     getVersion =
         return $
             object
@@ -1002,7 +1020,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 , "buildTarget" .= Version.buildTarget
                 ]
 
-    getHosting :: Handler Value
+    getHosting :: AppM Value
     getHosting = return $ case hostingConfig of
         Just hc ->
             object
@@ -1023,7 +1041,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 , "upgrade_vm_size" .= ("" :: Text)
                 ]
 
-    getStats :: Handler Value
+    getStats :: AppM Value
     getStats = liftIO $ do
         enabled <- GHC.Stats.getRTSStatsEnabled
         if enabled
@@ -1040,7 +1058,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                     object
                         ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)]
 
-    getClassificationPresets :: Handler [ClassificationPresetInfo]
+    getClassificationPresets :: AppM [ClassificationPresetInfo]
     getClassificationPresets = return $ map toInfo classificationPresets
       where
         toInfo p =
@@ -1051,7 +1069,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p)
                 }
 
-    getLogsHandler :: Maybe Int -> Handler Value
+    getLogsHandler :: Maybe Int -> AppM Value
     getLogsHandler sinceMaybe = do
         let since = fromMaybe 0 sinceMaybe
         (nextIndex, logLines) <- liftIO $ getLogLines since
@@ -1061,7 +1079,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 , "nextIndex" .= nextIndex
                 ]
 
-    postAuth :: LoginRequest -> Handler (Headers '[Header "Set-Cookie" String] Value)
+    postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value)
     postAuth loginReq =
         case password of
             Nothing ->
@@ -1076,9 +1094,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                         throwError err401{errBody = "{\"error\":\"invalid code\"}"}
 
     -- Core activity endpoint - streamlined data
-    getActivityInfo :: Text -> Text -> Handler ActivityInfo
+    getActivityInfo :: Text -> Text -> AppM ActivityInfo
     getActivityInfo dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         unitCfg <- liftIO $ getMergedUnitConfig dbManager
         case Service.getActivityInfo unitCfg db processId of
             Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
@@ -1089,39 +1107,39 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err}
 
     -- Activity flows sub-resource
-    getActivityFlows :: Text -> Text -> Handler [FlowSummary]
+    getActivityFlows :: Text -> Text -> AppM [FlowSummary]
     getActivityFlows dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedActivity db processId $ \activity ->
             return $ Service.getActivityFlowSummaries db activity
 
     -- Activity inputs sub-resource
-    getActivityInputs :: Text -> Text -> Handler [ExchangeDetail]
+    getActivityInputs :: Text -> Text -> AppM [ExchangeDetail]
     getActivityInputs dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedActivity db processId $ \activity ->
             return $ Service.getActivityInputDetails db activity
 
     -- Activity outputs sub-resource
-    getActivityOutputs :: Text -> Text -> Handler [ExchangeDetail]
+    getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail]
     getActivityOutputs dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedActivity db processId $ \activity ->
             return $ Service.getActivityOutputDetails db activity
 
     -- Activity reference product sub-resource
-    getActivityReferenceProduct :: Text -> Text -> Handler FlowDetail
+    getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail
     getActivityReferenceProduct dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedActivity db processId $ \activity -> do
             case Service.getActivityReferenceProductDetail db activity of
                 Nothing -> throwError err404{errBody = "No reference product found"}
                 Just refProduct -> return refProduct
 
     -- Activity tree export for visualization (configurable depth)
-    getActivityTree :: Text -> Text -> Handler TreeExport
+    getActivityTree :: Text -> Text -> AppM TreeExport
     getActivityTree dbName processId = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedActivity db processId $ \_activity -> do
             -- Use CLI --tree-depth option for configurable depth
             -- Default depth limit prevents DOS attacks via deep tree requests
@@ -1140,21 +1158,21 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     -- Goes through the cross-DB back-substitution path so inventories from
     -- dep DBs are merged into the returned flow map; metadata (flow names,
     -- units) comes from the merged FlowDB/UnitDB snapshot.
-    activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> Handler InventoryExport
+    activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport
     activityInventoryCore dbName processIdText mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         (processId, activity) <- resolveOrThrow db processIdText
         sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub
         (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
         pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol)
 
-    getActivityInventory :: Text -> Text -> Handler InventoryExport
+    getActivityInventory :: Text -> Text -> AppM InventoryExport
     getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing
 
     -- Activity graph endpoint for network visualization
-    getActivityGraph :: Text -> Text -> Maybe Double -> Handler GraphExport
+    getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport
     getActivityGraph dbName processId maybeCutoff = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff
         result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent
         case result of
@@ -1193,9 +1211,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
 
     -- Activity supply chain endpoint (scaling vector based). 'Nothing' takes the
     -- cached solve; 'Just' resolves substitutions through the cross-DB resolver.
-    activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> Handler SupplyChainResponse
+    activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> AppM SupplyChainResponse
     activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         let includeEdges = fromMaybe False includeEdgesParam
             scf =
                 buildSupplyChainFilter
@@ -1256,7 +1274,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                                     includeEdges
                         either throwServiceError pure eResp
 
-    getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Handler SupplyChainResponse
+    getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM SupplyChainResponse
     getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam =
         activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing
 
@@ -1277,7 +1295,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         Maybe Bool -> -- filter_is_reference
         Maybe Text -> -- group_by
         Maybe Text -> -- aggregate fn
-        Handler Aggregation
+        AppM Aggregation
     getActivityAggregate
         dbName
         processId
@@ -1294,7 +1312,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         freferenceParam
         groupByParam
         aggregateParam = do
-            (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+            (db, sharedSolver) <- requireDatabaseByName dbName
             -- Field-level validation via the Validation Applicative (accumulating).
             -- A request with both an invalid `scope` and an invalid `aggregate`
             -- now reports both errors at once, instead of just the first.
@@ -1369,9 +1387,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     -- Activity LCIA endpoint (single method within a collection). The GET
     -- variant honours a top-flows query param and logs the result; the POST
     -- route carries neither (no top-flows param, no logging).
-    activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> Handler LCIAResult
+    activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult
     activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         (processId, activity) <- resolveOrThrow db processIdText
         sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub
@@ -1379,12 +1397,12 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         when (isNothing mSub) $ liftIO $ logLCIAResult result method
         pure result
 
-    getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> Handler LCIAResult
+    getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult
     getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam =
         activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing
 
     -- POST: LCIA with substitutions
-    postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> Handler LCIAResult
+    postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult
     postActivityLCIA dbName processIdText _collectionName methodIdText subReq =
         activityLCIACore dbName processIdText methodIdText Nothing (Just subReq)
 
@@ -1402,9 +1420,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     -- Cost: each perturbation now triggers one cross-DB back-substitution
     -- per dep DB it actually reaches. MUMPS factorizations are cached, so
     -- back-sub is O(n²) per dep DB, not full O(n³) factorization.
-    postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> Handler SensitivityResponse
+    postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse
     postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         requireFullyLinked dbName db
         method <- loadMethodByUUID methodIdText
         (processId, activity) <- resolveOrThrow db processIdText
@@ -1454,31 +1472,31 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
 
     -- Batch LCIA endpoint (all methods in a collection). Thin alias over the
     -- top-level activityLCIABatchH; preserves the Servant call sites.
-    activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> Handler LCIABatchResult
-    activityLCIABatchCore = activityLCIABatchH dbManager
+    activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> AppM LCIABatchResult
+    activityLCIABatchCore = activityLCIABatchH
 
-    getActivityLCIABatch :: Text -> Text -> Text -> Handler LCIABatchResult
+    getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult
     getActivityLCIABatch dbName processIdText collectionName =
         activityLCIABatchCore dbName processIdText collectionName Nothing
 
     -- POST: Batch LCIA with substitutions
-    postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> Handler LCIABatchResult
+    postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult
     postActivityLCIABatch dbName processIdText collectionName subReq =
         activityLCIABatchCore dbName processIdText collectionName (Just subReq)
 
     -- POST: Inventory with substitutions
-    postActivityInventory :: Text -> Text -> SubstitutionRequest -> Handler InventoryExport
+    postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport
     postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq)
 
     -- POST: Supply chain with substitutions
-    postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> Handler SupplyChainResponse
+    postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> AppM SupplyChainResponse
     postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq =
         activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq)
 
     -- Activity consumers endpoint (reverse supply chain)
-    getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> Handler ConsumersResponse
+    getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM ConsumersResponse
     getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         let presetFilters = expandPreset classificationPresets presetParam
             explicitFilters =
                 zipWith3
@@ -1510,9 +1528,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
             Right consumers -> return consumers
 
     -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity)
-    getActivityPathTo :: Text -> Text -> Maybe Text -> Handler Value
+    getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value
     getActivityPathTo dbName processIdText targetParam = do
-        (db, solver) <- requireDatabaseByName dbManager dbName
+        (db, solver) <- requireDatabaseByName dbName
         target <-
             maybe
                 (throwError err400{errBody = "Missing required 'target' query parameter"})
@@ -1533,9 +1551,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     -- Call sites in this `where` pass `dbManager` explicitly when needed.)
 
     -- Activity analysis endpoint (dispatches to registered analyzers)
-    getActivityAnalyze :: Text -> Text -> Text -> Handler Value
+    getActivityAnalyze :: Text -> Text -> Text -> AppM Value
     getActivityAnalyze dbName processIdText analyzerName = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of
             Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)}
             Just analyzer -> do
@@ -1544,7 +1562,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                     Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
                     Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
                     Right (actProcessId, _) -> do
-                        inventory <- inventoryWithDeps dbManager dbName db sharedSolver actProcessId
+                        inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
                         (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
                         loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
                         let methods = map snd loadedMethods
@@ -1561,13 +1579,13 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                         liftIO $ ahAnalyze analyzer ctx
 
     -- Contributing flows: top elementary flows by LCIA contribution for a specific method
-    getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> Handler ContributingFlowsResult
+    getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult
     getContributingFlows dbName processIdText _collectionName methodIdText limitParam =
         withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
             let lim = fromMaybe 20 limitParam
             unitCfg <- liftIO $ getMergedUnitConfig dbManager
             (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-            inventory <- inventoryWithDeps dbManager dbName db sharedSolver actProcessId
+            inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
             tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
             let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables)
                 (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables
@@ -1602,7 +1620,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                     }
 
     -- Contributing activities: top upstream activities by LCIA contribution for a specific method
-    getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> Handler ContributingActivitiesResult
+    getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult
     getContributingActivities dbName processIdText _collectionName methodIdText limitParam =
         withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
             let lim = fromMaybe 10 limitParam
@@ -1650,9 +1668,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     -- as a sum across all participating DBs (root + each dep DB reached at
     -- request time); non-regional methods read the merged inventory only.
     -- Flow detail endpoint
-    getFlowDetail :: Text -> Text -> Handler FlowDetail
+    getFlowDetail :: Text -> Text -> AppM FlowDetail
     getFlowDetail dbName flowIdText = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedFlow db flowIdText $ \flow -> do
             let fid = flowKindId flow
                 unitName' = flowKindUnitName (dbUnits db) flow
@@ -1660,14 +1678,14 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
             return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount
 
     -- Activities using a specific flow
-    getFlowActivities :: Text -> Text -> Handler [ActivitySummary]
+    getFlowActivities :: Text -> Text -> AppM [ActivitySummary]
     getFlowActivities dbName flowIdText = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         withValidatedFlow db flowIdText $ \flow ->
             return $ Service.getActivitiesUsingFlow db (flowKindId flow)
 
     -- List all available methods (from loaded collections)
-    getMethods :: Handler [MethodSummary]
+    getMethods :: AppM [MethodSummary]
     getMethods = do
         loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
         return
@@ -1683,7 +1701,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
             ]
 
     -- Get method details
-    getMethodDetail :: Text -> Handler MethodDetail
+    getMethodDetail :: Text -> AppM MethodDetail
     getMethodDetail methodIdText = do
         method <- loadMethodByUUID methodIdText
         return $
@@ -1698,15 +1716,15 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 }
 
     -- Get method characterization factors
-    getMethodFactors :: Text -> Handler [MethodFactorAPI]
+    getMethodFactors :: Text -> AppM [MethodFactorAPI]
     getMethodFactors methodIdText = do
         method <- loadMethodByUUID methodIdText
         return $ map cfToAPI (methodFactors method)
 
     -- Get method flow mapping status
-    getMethodMapping :: Text -> Text -> Handler MappingStatus
+    getMethodMapping :: Text -> Text -> AppM MappingStatus
     getMethodMapping dbName methodIdText = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
         let stats = computeMappingStats mappings
@@ -1746,9 +1764,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 }
 
     -- DB-flow-centric mapping: all biosphere flows with their CF assignments
-    getFlowCFMapping :: Text -> Text -> Handler FlowCFMapping
+    getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping
     getFlowCFMapping dbName methodIdText = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
         let
@@ -1790,9 +1808,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     strategyToText NoMatch = "none"
 
     -- Characterization: matched CFs for a method, filterable by flow name
-    getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> Handler CharacterizationResult
+    getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult
     getCharacterization dbName methodIdText flowFilter limitParam = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         let lim = fromMaybe 50 limitParam
             queryLower = fmap T.toLower flowFilter
@@ -1833,7 +1851,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
     matchesQuery (Just q) cfName dbName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbName)
 
     -- Helper to load a method by UUID from the loaded collections
-    loadMethodByUUID :: Text -> Handler Method
+    loadMethodByUUID :: Text -> AppM Method
     loadMethodByUUID uuidText = do
         loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
         let allMethods = map snd loadedMethods
@@ -1850,10 +1868,10 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
         Text ->
         Text ->
         Text ->
-        (Database -> SharedSolver -> ProcessId -> Activity -> Method -> Handler a) ->
-        Handler a
+        (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a) ->
+        AppM a
     withActivityAndMethod dbName processIdText methodIdText k = do
-        (db, sharedSolver) <- requireDatabaseByName dbManager dbName
+        (db, sharedSolver) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         case Service.resolveActivityAndProcessId db processIdText of
             Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
@@ -1862,7 +1880,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
             Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method
 
     -- Method collection handlers
-    getMethodCollections :: Handler MethodCollectionListResponse
+    getMethodCollections :: AppM MethodCollectionListResponse
     getMethodCollections = do
         statuses <- liftIO $ DM.listMethodCollections dbManager
         return $
@@ -1882,11 +1900,11 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 | s <- statuses
                 ]
 
-    loadMethodCollectionHandler :: Text -> Handler ActivateResponse
+    loadMethodCollectionHandler :: Text -> AppM ActivateResponse
     loadMethodCollectionHandler name =
         simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name)
 
-    unloadMethodCollectionHandler :: Text -> Handler ActivateResponse
+    unloadMethodCollectionHandler :: Text -> AppM ActivateResponse
     unloadMethodCollectionHandler name =
         simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name)
 
@@ -1903,9 +1921,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
             }
 
     -- Search flows by name or synonym with optional language filtering and pagination
-    searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Handler (SearchResults FlowSearchResult)
+    searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult)
     searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         case queryParam of
             Nothing -> return (SearchResults [] 0 0 50 False 0.0)
             Just query -> do
@@ -1921,9 +1939,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 searchFlowsInternal db ff
 
     -- Search activities by specific fields with pagination and count
-    searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Handler (SearchResults ActivitySummary)
+    searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary)
     searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         -- Expand preset filters then merge with explicit classification params
         let exactMatch = fromMaybe False exactParam
             presetFilters = expandPreset classificationPresets presetParam
@@ -1956,14 +1974,14 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets =
                 Success searchResults -> return searchResults
                 Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr}
 
-    getClassifications :: Text -> Handler [ClassificationSystem]
+    getClassifications :: Text -> AppM [ClassificationSystem]
     getClassifications dbName = do
-        (db, _) <- requireDatabaseByName dbManager dbName
+        (db, _) <- requireDatabaseByName dbName
         return $ Service.getClassifications db
 
     -- Batch impacts: thin alias over the top-level batchImpactsH.
-    postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> Handler BatchImpactsResponse
-    postImpactsBatch = batchImpactsH dbManager
+    postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse
+    postImpactsBatch = batchImpactsH
 
 {- | Evaluate every scoring set against the raw impact score map.
 Returns (setName → scoreName → value, setName → varName → ScoringIndicator).
@@ -2021,7 +2039,7 @@ paginateResults results limitParam offsetParam = do
 The 'ffQuery' is always present (callers short-circuit on the no-query
 case); language filtering is not yet implemented.
 -}
-searchFlowsInternal :: Database -> Service.FlowFilter -> Handler (SearchResults FlowSearchResult)
+searchFlowsInternal :: Database -> Service.FlowFilter -> AppM (SearchResults FlowSearchResult)
 searchFlowsInternal db Service.FlowFilter{Service.ffQuery = query, Service.ffLimit = limitParam, Service.ffOffset = offsetParam, Service.ffSort = sortParam, Service.ffOrder = orderParam} = do
     -- Language filtering not yet implemented, search all synonyms
     let flows = findFlowsBySynonym db query

From e9363e5a4546adeb7efb7efc3f37ebf6b65306c3 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 01:11:34 +0200
Subject: [PATCH 09/43] refactor(cli): factor optparse-applicative builders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Six tiny helpers (strOpt / optStrOpt / textOpt / optTextOpt / intOpt /
optIntOpt / strArg / textArg) collapse the four shapes that dominated
src/CLI/Parser.hs — Text option, Int option, positional arg, with or
without short alias — into one line each. ~172 LOC removed, `--help`
output unchanged, openapi.json byte-identical, hspec 1052/1052 green.

Categorically: optparse-applicative's Parser is the textbook free
Applicative over a primitive 'parser action'. Composition with <*>
keeps the static structure exposed, which is precisely why a Parser
can produce both the input parsing and the `--help` text from one
definition. Stopping at Applicative (instead of going to Monad) is
the structural reason these helpers can exist at all — a Monadic
parser would have to inspect run-time values, defeating the static
introspection.
---
 src/CLI/Parser.hs | 322 +++++++++++-----------------------------------
 1 file changed, 75 insertions(+), 247 deletions(-)

diff --git a/src/CLI/Parser.hs b/src/CLI/Parser.hs
index 4807fde3..b41d6495 100644
--- a/src/CLI/Parser.hs
+++ b/src/CLI/Parser.hs
@@ -10,6 +10,48 @@ import Options.Applicative
 import qualified Options.Applicative as OA
 import Version (buildTarget, gitHash, gitTag, version)
 
+-- ---------------------------------------------------------------------------
+-- Option-builder helpers
+--
+-- optparse-applicative is the textbook /Free Applicative/ over a primitive
+-- @Parser@: composition with @\<*\>@ keeps the static structure exposed
+-- (which is why @--help@ generation works), and the helpers below factor
+-- the four shapes that dominated this file before — Text option, Int
+-- option, switch, positional Text arg — into one line each.
+-- ---------------------------------------------------------------------------
+
+-- | @strOption@ with @long/metavar/help@, optionally a short alias.
+strOpt :: String -> Maybe Char -> String -> String -> Parser String
+strOpt l ms m h = strOption (long l <> maybe mempty short ms <> metavar m <> help h)
+
+-- | @optional strOpt@ — most of the global / command parsers use this.
+optStrOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe String)
+optStrOpt l ms m h = optional (strOpt l ms m h)
+
+-- | Text variant: read as String then pack.
+textOpt :: String -> Maybe Char -> String -> String -> Parser Text
+textOpt l ms m h = T.pack <$> strOpt l ms m h
+
+-- | @optional textOpt@.
+optTextOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe Text)
+optTextOpt l ms m h = optional (textOpt l ms m h)
+
+-- | @option auto@ for Int-shaped options (limit, offset, port, depth…).
+intOpt :: String -> Maybe Char -> String -> String -> Parser Int
+intOpt l ms m h = option auto (long l <> maybe mempty short ms <> metavar m <> help h)
+
+-- | @optional intOpt@.
+optIntOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe Int)
+optIntOpt l ms m h = optional (intOpt l ms m h)
+
+-- | Positional @Text@ argument with metavar + help.
+textArg :: String -> String -> Parser Text
+textArg m h = T.pack <$> argument str (metavar m <> help h)
+
+-- | Positional @String@ argument with metavar + help (for filenames).
+strArg :: String -> String -> Parser String
+strArg m h = argument str (metavar m <> help h)
+
 {- | Main CLI parser combining global options and optional command
 If no command is given, just load database and exit (useful for cache generation)
 -}
@@ -19,74 +61,18 @@ cliParser = CLIConfig <$> globalOptionsParser <*> optional commandParser
 -- | Global options parser (applied before commands)
 globalOptionsParser :: Parser GlobalOptions
 globalOptionsParser = do
-    configFile <-
-        optional $
-            strOption
-                ( long "config"
-                    <> short 'c'
-                    <> metavar "FILE"
-                    <> help "TOML config file (required)"
-                )
-
-    dbName <-
-        optional $
-            ( T.pack
-                <$> strOption
-                    ( long "db"
-                        <> metavar "NAME"
-                        <> help "Database name to query (from config file)"
-                    )
-            )
-
-    methodsDir <-
-        optional $
-            strOption
-                ( long "methods"
-                    <> metavar "PATH"
-                    <> help "Directory containing ILCD method XML files for LCIA"
-                )
-
+    configFile <- optStrOpt "config" (Just 'c') "FILE" "TOML config file (required)"
+    dbName <- optTextOpt "db" Nothing "NAME" "Database name to query (from config file)"
+    methodsDir <- optStrOpt "methods" Nothing "PATH" "Directory containing ILCD method XML files for LCIA"
     format <-
         optional $
             option
                 outputFormatReader
-                ( long "format"
-                    <> metavar "FORMAT"
-                    <> help "Output format: json|csv|table|pretty (default depends on command)"
-                )
-
-    jsonPath <-
-        optional $
-            ( T.pack
-                <$> strOption
-                    ( long "jsonpath"
-                        <> metavar "PATH"
-                        <> help "JSONPath for CSV extraction (required with --format csv). Examples: 'results', 'activity.exchanges'"
-                    )
-            )
-
-    noCache <-
-        switch
-            ( long "no-cache"
-                <> help "Disable caching for testing and development"
-            )
-
-    serverUrl <-
-        optional $
-            strOption
-                ( long "url"
-                    <> metavar "URL"
-                    <> help "Server URL for HTTP client mode (or set VOLCA_URL env var)"
-                )
-
-    serverPassword <-
-        optional $
-            strOption
-                ( long "password"
-                    <> metavar "PASSWORD"
-                    <> help "Password for authentication (or set VOLCA_PASSWORD env var)"
-                )
-
+                (long "format" <> metavar "FORMAT" <> help "Output format: json|csv|table|pretty (default depends on command)")
+    jsonPath <- optTextOpt "jsonpath" Nothing "PATH" "JSONPath for CSV extraction (required with --format csv). Examples: 'results', 'activity.exchanges'"
+    noCache <- switch (long "no-cache" <> help "Disable caching for testing and development")
+    serverUrl <- optStrOpt "url" Nothing "URL" "Server URL for HTTP client mode (or set VOLCA_URL env var)"
+    serverPassword <- optStrOpt "password" Nothing "PASSWORD" "Password for authentication (or set VOLCA_PASSWORD env var)"
     pure GlobalOptions{..}
 
 -- | Output format reader for optparse-applicative
@@ -162,28 +148,14 @@ pluginParser =
 -- | Shared upload arguments parser (positional FILE, --name, --description)
 uploadArgsParser :: Parser UploadArgs
 uploadArgsParser = do
-    uaFile <- argument str (metavar "FILE" <> help "Archive or data file to upload (ZIP, 7z, tar.gz, tar.xz, XML, CSV)")
-    uaName <-
-        T.pack
-            <$> strOption
-                ( long "name"
-                    <> short 'n'
-                    <> metavar "NAME"
-                    <> help "Display name (required)"
-                )
-    uaDescription <-
-        optional $
-            T.pack
-                <$> strOption
-                    ( long "description"
-                        <> metavar "TEXT"
-                        <> help "Optional description"
-                    )
+    uaFile <- strArg "FILE" "Archive or data file to upload (ZIP, 7z, tar.gz, tar.xz, XML, CSV)"
+    uaName <- textOpt "name" (Just 'n') "NAME" "Display name (required)"
+    uaDescription <- optTextOpt "description" Nothing "TEXT" "Optional description"
     pure UploadArgs{..}
 
 -- | Delete name parser (positional NAME)
 deleteNameParser :: Parser Text
-deleteNameParser = T.pack <$> argument str (metavar "NAME" <> help "Name of the resource to delete")
+deleteNameParser = textArg "NAME" "Name of the resource to delete"
 
 -- | Server command parser
 serverParser :: Parser Command
@@ -191,51 +163,14 @@ serverParser = Server <$> serverOptionsParser
 
 serverOptionsParser :: Parser ServerOptions
 serverOptionsParser = do
-    serverPort <-
-        optional $
-            option
-                auto
-                ( long "port"
-                    <> short 'p'
-                    <> metavar "PORT"
-                    <> help "Server port (overrides [server].port from config; default: 8080)"
-                )
+    serverPort <- optIntOpt "port" (Just 'p') "PORT" "Server port (overrides [server].port from config; default: 8080)"
     serverLoadDbs <-
         optional $
-            option
-                dbListReader
-                ( long "load"
-                    <> metavar "DB1,DB2,..."
-                    <> help "Comma-separated list of databases to load at startup (overrides config load=true)"
-                )
-    serverDesktopMode <-
-        switch
-            ( long "desktop"
-                <> help "Desktop mode: print VOLCA_PORT=N on startup for launcher integration"
-            )
-    serverStaticDir <-
-        optional $
-            strOption
-                ( long "static-dir"
-                    <> metavar "PATH"
-                    <> help "Override default static file directory (default: web/dist)"
-                )
-    serverIdleTimeout <-
-        option
-            auto
-            ( long "idle-timeout"
-                <> value 0
-                <> metavar "SECONDS"
-                <> help "Shutdown after N seconds of inactivity (0=disabled, default: 0)"
-            )
-    serverTreeDepth <-
-        option
-            auto
-            ( long "tree-depth"
-                <> value 2
-                <> metavar "DEPTH"
-                <> help "Default max depth for the /tree endpoint (default: 2)"
-            )
+            option dbListReader (long "load" <> metavar "DB1,DB2,..." <> help "Comma-separated list of databases to load at startup (overrides config load=true)")
+    serverDesktopMode <- switch (long "desktop" <> help "Desktop mode: print VOLCA_PORT=N on startup for launcher integration")
+    serverStaticDir <- optStrOpt "static-dir" Nothing "PATH" "Override default static file directory (default: web/dist)"
+    serverIdleTimeout <- option auto (long "idle-timeout" <> value 0 <> metavar "SECONDS" <> help "Shutdown after N seconds of inactivity (0=disabled, default: 0)")
+    serverTreeDepth <- option auto (long "tree-depth" <> value 2 <> metavar "DEPTH" <> help "Default max depth for the /tree endpoint (default: 2)")
     pure ServerOptions{..}
 
 -- | Reader for comma-separated list of database names
@@ -270,88 +205,20 @@ flowSubCommandParser =
 -- | Search activities parser (now top-level)
 searchActivitiesParser :: Parser Command
 searchActivitiesParser = do
-    searchName <-
-        optional $
-            strOption
-                ( long "name"
-                    <> metavar "TERM"
-                    <> help "Search by activity name"
-                )
-
-    searchGeo <-
-        optional $
-            strOption
-                ( long "geo"
-                    <> metavar "LOCATION"
-                    <> help "Filter by geography (exact match)"
-                )
-
-    searchProduct <-
-        optional $
-            strOption
-                ( long "product"
-                    <> metavar "PRODUCT"
-                    <> help "Filter by reference product"
-                )
-
-    searchLimit <-
-        optional $
-            option
-                auto
-                ( long "limit"
-                    <> metavar "N"
-                    <> help "Limit number of results (max 1000, default 50)"
-                )
-
-    searchOffset <-
-        optional $
-            option
-                auto
-                ( long "offset"
-                    <> metavar "N"
-                    <> help "Offset for pagination (default 0)"
-                )
-
+    searchName <- optTextOpt "name" Nothing "TERM" "Search by activity name"
+    searchGeo <- optTextOpt "geo" Nothing "LOCATION" "Filter by geography (exact match)"
+    searchProduct <- optTextOpt "product" Nothing "PRODUCT" "Filter by reference product"
+    searchLimit <- optIntOpt "limit" Nothing "N" "Limit number of results (max 1000, default 50)"
+    searchOffset <- optIntOpt "offset" Nothing "N" "Offset for pagination (default 0)"
     pure $ SearchActivities SearchActivitiesOptions{..}
 
 -- | Search flows parser (now top-level)
 searchFlowsParser :: Parser Command
 searchFlowsParser = do
-    searchQuery <-
-        optional $
-            strOption
-                ( long "query"
-                    <> short 'q'
-                    <> metavar "TERM"
-                    <> help "Search term for flow names and synonyms"
-                )
-
-    searchLang <-
-        optional $
-            strOption
-                ( long "lang"
-                    <> metavar "LANG"
-                    <> help "Language for synonym search"
-                )
-
-    searchFlowsLimit <-
-        optional $
-            option
-                auto
-                ( long "limit"
-                    <> metavar "N"
-                    <> help "Limit number of results"
-                )
-
-    searchFlowsOffset <-
-        optional $
-            option
-                auto
-                ( long "offset"
-                    <> metavar "N"
-                    <> help "Offset for pagination"
-                )
-
+    searchQuery <- optTextOpt "query" (Just 'q') "TERM" "Search term for flow names and synonyms"
+    searchLang <- optTextOpt "lang" Nothing "LANG" "Language for synonym search"
+    searchFlowsLimit <- optIntOpt "limit" Nothing "N" "Limit number of results"
+    searchFlowsOffset <- optIntOpt "offset" Nothing "N" "Offset for pagination"
     pure $ SearchFlows SearchFlowsOptions{..}
 
 -- | Impacts (LCIA) command parser
@@ -364,32 +231,9 @@ impactsParser = do
 -- | LCIA options parser
 lciaOptionsParser :: Parser LCIAOptions
 lciaOptionsParser = do
-    lciaMethodId <-
-        T.pack
-            <$> strOption
-                ( long "method"
-                    <> short 'm'
-                    <> metavar "METHOD_UUID"
-                    <> help "Method UUID (method must be loaded on the server)"
-                )
-
-    lciaOutput <-
-        optional $
-            strOption
-                ( long "output"
-                    <> short 'o'
-                    <> metavar "FILE"
-                    <> help "Export results to XML ILCD format"
-                )
-
-    lciaCSV <-
-        optional $
-            strOption
-                ( long "csv"
-                    <> metavar "FILE"
-                    <> help "Export results to CSV format"
-                )
-
+    lciaMethodId <- textOpt "method" (Just 'm') "METHOD_UUID" "Method UUID (method must be loaded on the server)"
+    lciaOutput <- optStrOpt "output" (Just 'o') "FILE" "Export results to XML ILCD format"
+    lciaCSV <- optStrOpt "csv" Nothing "FILE" "Export results to CSV format"
     pure LCIAOptions{..}
 
 -- | Debug matrices command parser
@@ -402,29 +246,13 @@ debugMatricesParser = do
 -- | Debug matrices options parser
 debugMatricesOptionsParser :: Parser DebugMatricesOptions
 debugMatricesOptionsParser = do
-    debugOutput <-
-        strOption
-            ( long "output"
-                <> short 'o'
-                <> metavar "FILE"
-                <> help "Base filename for debug output (will generate _supply_chain.csv and _biosphere_matrix.csv)"
-            )
-
-    debugFlowFilter <-
-        optional $
-            strOption
-                ( long "flow-filter"
-                    <> metavar "FLOW"
-                    <> help "Filter to specific biosphere flow (e.g., 'Sulphur dioxide')"
-                )
-
+    debugOutput <- strOpt "output" (Just 'o') "FILE" "Base filename for debug output (will generate _supply_chain.csv and _biosphere_matrix.csv)"
+    debugFlowFilter <- optTextOpt "flow-filter" Nothing "FLOW" "Filter to specific biosphere flow (e.g., 'Sulphur dioxide')"
     pure DebugMatricesOptions{..}
 
 -- | Export matrices parser
 exportMatricesParser :: Parser Command
-exportMatricesParser = do
-    outputDir <- argument str (metavar "OUTPUT_DIR" <> help "Output directory for matrix export")
-    pure $ ExportMatrices outputDir
+exportMatricesParser = ExportMatrices <$> strArg "OUTPUT_DIR" "Output directory for matrix export"
 
 {- | Flow mapping command parser (renamed from 'mapping' to disambiguate
 from compartment-mapping and similar resources).

From ed74afdfdadb007934e115c3f8e3f02aadf7543b Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 01:21:57 +0200
Subject: [PATCH 10/43] chore(dbhandlers): drop unused Handler import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup leftover after the AppM migration in commit d0a9b1f. The file
no longer references Servant's Handler directly — every former
`Handler X` signature is now `AppM X`, so the `Handler` symbol is
dead in the import list. Restores the build to the pre-branch warning
count (2: pre-existing MCP unused-import / unused-match).
---
 src/API/DatabaseHandlers.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs
index c87f21fe..ae990c6e 100644
--- a/src/API/DatabaseHandlers.hs
+++ b/src/API/DatabaseHandlers.hs
@@ -54,7 +54,7 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as T
 import qualified Data.Text.IO as T
-import Servant (Handler, Header, Headers, addHeader, err400, err404, err500, errBody, throwError)
+import Servant (Header, Headers, addHeader, err400, err404, err500, errBody, throwError)
 import qualified System.Directory
 import System.FilePath ((</>))
 

From d810d3f146fbdc43ead7e04e1261638760860c14 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 11:06:19 +0200
Subject: [PATCH 11/43] refactor(schema): co-locate ToSchema with data
 declarations via Stripped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves the orphan / compilation-cycle deferral from C2. Each domain
data type now declares its own ToSchema next to its ToJSON / FromJSON
instance, derived via the same Stripped newtype that already carries
the JSON dictionaries.

Effect on src/API/OpenApi.hs: 293 → 144 lines. The module loses its
role as the orphan ToSchema collector and shrinks to a few special
cases — Value (untyped JSON), LocationKind (lowercase wire codes that
the generic schema would not surface), BinaryContent (OctetStream
format), plus the small handful of Database.Manager domain types
(MissingSupplier, DependencyStatus, DependencyChoice, LocationFallback,
LocationUnresolved, DatabaseSetupInfo) — and the enrichWithResources
post-processor that was always its real reason to exist.

Three categories handled:
  1. Records that already had Stripped JSON deriving — added ToSchema
     to the same `deriving (...) via (Stripped X)` clause.
  2. Sum types with default Generic encoding (TechRole, BioDirection
     in Types.hs; NodeType, EdgeType, FlowRole in API/Types.hs) —
     added `deriving anyclass (ToSchema)`.
  3. Polymorphic SearchResults a — standalone `deriving via (Stripped
     (SearchResults a)) instance ToSchema a => ToSchema (SearchResults
     a)` (the polymorphic context requires standalone).

Two custom ToSchema instances (ApiFlow with its discriminated `kind`
union; PerturbedEntry with its flattened Either) moved from OpenApi.hs
to API/Types.hs alongside their data declarations — same logic,
different home. The lens / openapi3 imports came along.

Net: −53 LOC across three files. openapi.json byte-identical; hspec
1052/1052 green.

Categorically: this is the same isomorphism transport (Coercible
witness) as the JSON commits, just for a third typeclass. With
ToJSON / FromJSON / ToSchema all derived through one newtype, the
"this type's wire form" definition lives in one place — what
Milewski calls /faithful representation/: the schema and the
serializer cannot drift, because they share their Generic Rep.
---
 src/API/OpenApi.hs | 169 ++-----------------------------
 src/API/Types.hs   | 242 +++++++++++++++++++++++++++++++--------------
 src/Types.hs       |  16 +--
 3 files changed, 187 insertions(+), 240 deletions(-)

diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs
index 57fd5a54..febd7612 100644
--- a/src/API/OpenApi.hs
+++ b/src/API/OpenApi.hs
@@ -18,17 +18,16 @@ import API.JsonOptions (strippedSchemaOptions)
 import API.Resources (Resource)
 import qualified API.Resources as R
 import API.Types
-import Control.Lens ((%~), (&), (.~), (?~), (^.))
+import Control.Lens ((%~), (&), (?~), (^.))
 import Data.Aeson (Value, toJSON)
 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
 import Data.OpenApi
 import qualified Data.OpenApi.Lens as OA
-import Data.Proxy (Proxy (..))
 import Data.Text (Text)
 import qualified Data.Text as T
 import Database.Manager (DatabaseSetupInfo, DependencyChoice, DependencyStatus, MissingSupplier)
 import Network.HTTP.Types.Method (StdMethod (..))
-import Types (BioDirection, BiosphereFlow, Compartment, Exchange, LocationFallback, LocationKind, LocationUnresolved, Pedigree, TechRole, TechnosphereFlow, Unit)
+import Types (LocationFallback, LocationKind, LocationUnresolved)
 
 {- | Orphan schema instance forward declaration for the login request body.
 The real type lives in "API.Routes"; this is defined there and re-imported
@@ -40,15 +39,8 @@ the type in "API.Routes" — see 'instance ToSchema LoginRequest' there.
 instance ToSchema Value where
     declareNamedSchema _ = pure $ NamedSchema (Just "JsonValue") mempty
 
--- Domain types
-instance ToSchema TechRole
-instance ToSchema BioDirection
-instance ToSchema Compartment where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema Unit where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema TechnosphereFlow where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema BiosphereFlow where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema Pedigree where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema Exchange where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
+-- Domain types: TechRole, BioDirection, Unit now derive ToSchema next to
+-- their data declarations in src/Types.hs (via anyclass / DerivingVia).
 
 -- Database.Manager types
 instance ToSchema MissingSupplier
@@ -78,155 +70,14 @@ instance ToSchema DatabaseSetupInfo where declareNamedSchema = genericDeclareNam
 
 -- API.Types — every record type uses strippedSchemaOptions so the generated
 -- OpenAPI spec matches the wire JSON keys produced by API.JsonOptions.stripLowerPrefix.
-instance ToSchema ClassificationSystem where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance (ToSchema a) => ToSchema (SearchResults a) where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivitySummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ConsumerResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ConsumersResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowSearchResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema InventoryExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema InventoryMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema InventoryFlowDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema InventoryStatistics where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema TreeExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema TreeMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ExportNode where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema NodeType
-instance ToSchema EdgeType
-instance ToSchema TreeEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowSummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowRole
-instance ToSchema GraphExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema GraphNode where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema GraphEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema LCIAResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ScoringIndicator where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema LCIABatchResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema BatchImpactsRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema BatchImpactsEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema BatchImpactsResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowContributionEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ContributingFlowsResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityContribution where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ContributingActivitiesResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MappingStatus where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema UnmappedFlowAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowCFMapping where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowCFEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema CharacterizationResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema CharacterizationEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema DatabaseListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema DatabaseStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivateResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema RelinkResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema DepLoadResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema LoadDatabaseResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema UploadRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema UploadResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MethodCollectionListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MethodCollectionStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema RefDataListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema RefDataStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SynonymGroupsResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MethodSummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MethodDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema MethodFactorAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SupplyChainResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SupplyChainEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SupplyChainEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema Aggregation where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema AggregationGroup where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SubstitutionRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema Substitution where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SensitivityRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema SensitivityResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
+-- ToSchema (SearchResults a) standalone-derived in API.Types via Stripped.
+-- ToSchema for ApiFlow, NodeType, EdgeType, FlowRole now derived next to
+-- their data declarations in src/API/Types.hs (NodeType/EdgeType/FlowRole
+-- via anyclass; ApiFlow has a custom instance there to keep the discriminated
+-- `kind` union representation).
 
--- Manual schema for ApiFlow — discriminated by 'kind' so OpenAPI consumers
--- see a real tagged union instead of a generic Either.
-instance ToSchema ApiFlow where
-    declareNamedSchema _ = do
-        techRef <- declareSchemaRef (Proxy :: Proxy TechnosphereFlow)
-        bioRef <- declareSchemaRef (Proxy :: Proxy BiosphereFlow)
-        let kindEnum =
-                mempty
-                    & type_ ?~ OpenApiString
-                    & enum_
-                        ?~ [ toJSON ("technosphere" :: Text)
-                           , toJSON ("biosphere" :: Text)
-                           , toJSON ("unresolved" :: Text)
-                           ]
-            tech =
-                mempty
-                    & type_ ?~ OpenApiObject
-                    & properties
-                        .~ InsOrdHashMap.fromList
-                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("technosphere" :: Text)]))
-                            , ("flow", techRef)
-                            ]
-                    & required .~ ["kind", "flow"]
-            bio =
-                mempty
-                    & type_ ?~ OpenApiObject
-                    & properties
-                        .~ InsOrdHashMap.fromList
-                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("biosphere" :: Text)]))
-                            , ("flow", bioRef)
-                            ]
-                    & required .~ ["kind", "flow"]
-            unresolved =
-                mempty
-                    & type_ ?~ OpenApiObject
-                    & properties
-                        .~ InsOrdHashMap.fromList
-                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("unresolved" :: Text)]))
-                            , ("id", Inline (mempty & type_ ?~ OpenApiString & format ?~ "uuid"))
-                            ]
-                    & required .~ ["kind", "id"]
-        pure $
-            NamedSchema (Just "ApiFlow") $
-                mempty
-                    & type_ ?~ OpenApiObject
-                    & properties
-                        .~ InsOrdHashMap.fromList
-                            [ ("kind", Inline kindEnum)
-                            ]
-                    & required .~ ["kind"]
-                    & OA.oneOf ?~ [Inline tech, Inline bio, Inline unresolved]
+-- PerturbedEntry's custom schema moved to API.Types alongside its data decl.
 
--- Manual schema: the Either inside PerturbedEntry is flattened by ToJSON
--- to {perturbation, impact, deltaImpact} on success and {perturbation, error}
--- on failure. The Generic-derived schema would expose the Haskell shape
--- (a oneOf wrapper around the Either) instead of the flat wire format.
-instance ToSchema PerturbedEntry where
-    declareNamedSchema _ = do
-        pertRef <- declareSchemaRef (Proxy :: Proxy Perturbation)
-        lciaRef <- declareSchemaRef (Proxy :: Proxy LCIAResult)
-        doubleRef <- declareSchemaRef (Proxy :: Proxy Double)
-        textRef <- declareSchemaRef (Proxy :: Proxy Text)
-        pure $
-            NamedSchema (Just "PerturbedEntry") $
-                mempty
-                    & type_ ?~ OpenApiObject
-                    & properties
-                        .~ InsOrdHashMap.fromList
-                            [ ("perturbation", pertRef)
-                            , ("impact", lciaRef)
-                            , ("deltaImpact", doubleRef)
-                            , ("error", textRef)
-                            ]
-                    & required .~ ["perturbation"]
-instance ToSchema Perturbation where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ExchangeDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ExchangeWithUnit where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityForAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityLinks where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ActivityStats where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema FlowDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ClassificationEntryInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema ClassificationPresetInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
 instance ToSchema BinaryContent where
     declareNamedSchema _ =
         pure $
diff --git a/src/API/Types.hs b/src/API/Types.hs
index 81fb81d2..45a5ec74 100644
--- a/src/API/Types.hs
+++ b/src/API/Types.hs
@@ -1,16 +1,23 @@
+{-# LANGUAGE DeriveAnyClass #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingVia #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE DerivingVia #-}
+{-# LANGUAGE StandaloneDeriving #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
 
 module API.Types where
 
 import API.JsonOptions (Stripped (..), strippedParseJSON, strippedToEncoding, strippedToJSON)
+import Control.Lens ((&), (.~), (?~))
 import Data.Aeson
 import Data.Aeson.Types (Parser)
 import qualified Data.ByteString.Lazy as BSL
+import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
 import qualified Data.Map as M
+import Data.OpenApi (NamedSchema (..), OpenApiType (..), Referenced (..), ToSchema (..), declareSchemaRef, enum_, format, properties, required, type_)
+import qualified Data.OpenApi.Lens as OA
+import Data.Proxy (Proxy (..))
 import qualified Data.Set as S
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -95,7 +102,60 @@ instance FromJSON ApiFlow where
             "unresolved" -> ApiUnresolvedFlow <$> o .: "id"
             other -> fail $ "ApiFlow.kind must be \"technosphere\", \"biosphere\", \"waste\", or \"unresolved\", got: " <> T.unpack other
 
--- | Search response combining results and count
+-- | Manual schema for ApiFlow — discriminated by 'kind' so OpenAPI consumers
+-- see a real tagged union instead of a generic Either.
+instance ToSchema ApiFlow where
+    declareNamedSchema _ = do
+        techRef <- declareSchemaRef (Proxy :: Proxy TechnosphereFlow)
+        bioRef <- declareSchemaRef (Proxy :: Proxy BiosphereFlow)
+        let kindEnum =
+                mempty
+                    & type_ ?~ OpenApiString
+                    & enum_
+                        ?~ [ toJSON ("technosphere" :: Text)
+                           , toJSON ("biosphere" :: Text)
+                           , toJSON ("unresolved" :: Text)
+                           ]
+            tech =
+                mempty
+                    & type_ ?~ OpenApiObject
+                    & properties
+                        .~ InsOrdHashMap.fromList
+                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("technosphere" :: Text)]))
+                            , ("flow", techRef)
+                            ]
+                    & required .~ ["kind", "flow"]
+            bio =
+                mempty
+                    & type_ ?~ OpenApiObject
+                    & properties
+                        .~ InsOrdHashMap.fromList
+                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("biosphere" :: Text)]))
+                            , ("flow", bioRef)
+                            ]
+                    & required .~ ["kind", "flow"]
+            unresolved =
+                mempty
+                    & type_ ?~ OpenApiObject
+                    & properties
+                        .~ InsOrdHashMap.fromList
+                            [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("unresolved" :: Text)]))
+                            , ("id", Inline (mempty & type_ ?~ OpenApiString & format ?~ "uuid"))
+                            ]
+                    & required .~ ["kind", "id"]
+        pure $
+            NamedSchema (Just "ApiFlow") $
+                mempty
+                    & type_ ?~ OpenApiObject
+                    & properties
+                        .~ InsOrdHashMap.fromList
+                            [ ("kind", Inline kindEnum)
+                            ]
+                    & required .~ ["kind"]
+                    & OA.oneOf ?~ [Inline tech, Inline bio, Inline unresolved]
+
+-- | Search response combining results and count. ToSchema is added below
+-- via a standalone deriving (needed because of the `(ToSchema a) =>` context).
 data SearchResults a = SearchResults
     { srResults :: [a] -- The actual search results
     , srTotal :: Int -- Total count of all matching items (before pagination)
@@ -106,6 +166,10 @@ data SearchResults a = SearchResults
     }
     deriving (Generic)
 
+deriving via (Stripped (SearchResults a)) instance ToJSON a => ToJSON (SearchResults a)
+deriving via (Stripped (SearchResults a)) instance FromJSON a => FromJSON (SearchResults a)
+deriving via (Stripped (SearchResults a)) instance ToSchema a => ToSchema (SearchResults a)
+
 -- | Minimal activity information for navigation
 data ActivitySummary = ActivitySummary
     { prsProcessId :: Text -- ProcessId format: activity_uuid_product_uuid
@@ -118,7 +182,7 @@ data ActivitySummary = ActivitySummary
     , prsAllocationFormula :: Maybe Text -- Raw SimaPro allocation formula; Nothing if purely numeric
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivitySummary)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivitySummary)
 
 -- | Consumer result — ActivitySummary enriched with BFS depth from the queried supplier
 data ConsumerResult = ConsumerResult
@@ -132,7 +196,7 @@ data ConsumerResult = ConsumerResult
     , crClassifications :: M.Map Text Text -- Classifications (ISIC, CPC, Category, etc.), mirrors SupplyChainEntry
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ConsumerResult)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ConsumerResult)
 
 {- | Wrapper for /consumers responses. Mirrors 'SupplyChainResponse' so clients
 have a uniform {entries, edges} shape in both traversal directions. Edges
@@ -144,7 +208,7 @@ data ConsumersResponse = ConsumersResponse
     , crrEdges :: ![SupplyChainEdge]
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ConsumersResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ConsumersResponse)
 
 -- | Enhanced flow information for search results (now includes synonyms)
 data FlowSearchResult = FlowSearchResult
@@ -155,7 +219,7 @@ data FlowSearchResult = FlowSearchResult
     , fsrSynonyms :: M.Map Text [Text] -- Synonyms by language (converted from Set to List for JSON)
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowSearchResult)
+    deriving (ToJSON, ToSchema) via (Stripped FlowSearchResult)
 
 -- | Inventory export data structures
 data InventoryExport = InventoryExport
@@ -164,7 +228,7 @@ data InventoryExport = InventoryExport
     , ieStatistics :: InventoryStatistics
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped InventoryExport)
+    deriving (ToJSON, ToSchema) via (Stripped InventoryExport)
 
 data InventoryMetadata = InventoryMetadata
     { imRootActivity :: ActivitySummary
@@ -173,7 +237,7 @@ data InventoryMetadata = InventoryMetadata
     , imResourceFlows :: Int -- Biosphere inputs (resource extraction)
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped InventoryMetadata)
+    deriving (ToJSON, ToSchema) via (Stripped InventoryMetadata)
 
 data InventoryFlowDetail = InventoryFlowDetail
     { ifdFlow :: BiosphereFlow -- Inventory flows are always biosphere
@@ -183,7 +247,7 @@ data InventoryFlowDetail = InventoryFlowDetail
     , ifdCategory :: Text -- Flow category for grouping
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped InventoryFlowDetail)
+    deriving (ToJSON, ToSchema) via (Stripped InventoryFlowDetail)
 
 data InventoryStatistics = InventoryStatistics
     { isTotalQuantity :: Double -- Sum of absolute values
@@ -192,7 +256,7 @@ data InventoryStatistics = InventoryStatistics
     , isTopCategories :: [(Text, Int)] -- Top flow categories by count
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped InventoryStatistics)
+    deriving (ToJSON, ToSchema) via (Stripped InventoryStatistics)
 
 -- | Tree export data structures for visualization
 data TreeExport = TreeExport
@@ -201,7 +265,7 @@ data TreeExport = TreeExport
     , teEdges :: [TreeEdge]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped TreeExport)
+    deriving (ToJSON, ToSchema) via (Stripped TreeExport)
 
 data TreeMetadata = TreeMetadata
     { tmRootId :: Text -- Changed to Text (ProcessId format)
@@ -212,7 +276,7 @@ data TreeMetadata = TreeMetadata
     , tmExpandableNodes :: Int -- Nodes that could expand further
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped TreeMetadata)
+    deriving (ToJSON, ToSchema) via (Stripped TreeMetadata)
 
 data ExportNode = ExportNode
     { enId :: Text -- Changed to Text (ProcessId format)
@@ -228,13 +292,15 @@ data ExportNode = ExportNode
     , enCompartment :: Maybe Text -- Biosphere compartment (air/water/soil), only for BiosphereNodes
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ExportNode)
+    deriving (ToJSON, ToSchema) via (Stripped ExportNode)
 
 data NodeType = ActivityNode | LoopNode | BiosphereEmissionNode | BiosphereResourceNode
     deriving (Eq, Show, Generic)
+    deriving anyclass (ToSchema)
 
 data EdgeType = TechnosphereEdge | BiosphereEmissionEdge | BiosphereResourceEdge
     deriving (Eq, Show, Generic)
+    deriving anyclass (ToSchema)
 
 data TreeEdge = TreeEdge
     { teFrom :: Text -- Changed to Text (ProcessId format)
@@ -245,7 +311,7 @@ data TreeEdge = TreeEdge
     , teEdgeType :: EdgeType -- Type of edge (technosphere or biosphere)
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped TreeEdge)
+    deriving (ToJSON, ToSchema) via (Stripped TreeEdge)
 
 data FlowInfo = FlowInfo
     { fiId :: UUID
@@ -253,7 +319,7 @@ data FlowInfo = FlowInfo
     , fiCategory :: Text
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowInfo)
+    deriving (ToJSON, ToSchema) via (Stripped FlowInfo)
 
 -- | Graph export data structures for network visualization
 data GraphExport = GraphExport
@@ -262,7 +328,7 @@ data GraphExport = GraphExport
     , geUnitGroups :: M.Map Text Text -- Unit to unit group mapping
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped GraphExport)
+    deriving (ToJSON, ToSchema) via (Stripped GraphExport)
 
 data GraphNode = GraphNode
     { gnNodeId :: Int -- Numeric ID for efficient frontend processing
@@ -273,7 +339,7 @@ data GraphNode = GraphNode
     , gnLocation :: Text -- Geography
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped GraphNode)
+    deriving (ToJSON, ToSchema) via (Stripped GraphNode)
 
 data GraphEdge = GraphEdge
     { geSource :: Int -- Source node ID
@@ -283,7 +349,7 @@ data GraphEdge = GraphEdge
     , geFlowName :: Text -- Name of the flow
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped GraphEdge)
+    deriving (ToJSON, ToSchema) via (Stripped GraphEdge)
 
 {- | Lightweight flow information for lists. Carries either a tech or a bio
 flow; the @ApiFlow@ tag is the wire discriminator.
@@ -295,11 +361,12 @@ data FlowSummary = FlowSummary
     , fsRole :: FlowRole -- Role in this specific activity
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowSummary)
+    deriving (ToJSON, ToSchema) via (Stripped FlowSummary)
 
 -- | Role of a flow in a specific activity context
 data FlowRole = InputFlow | OutputFlow | ReferenceProductFlow
     deriving (Show, Generic)
+    deriving anyclass (ToSchema)
 
 -- Synonym types removed - synonyms are now included directly in flow responses
 
@@ -313,14 +380,14 @@ data MethodSummary = MethodSummary
     , msmCollection :: Text -- Parent collection name (e.g., "ef-31")
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped MethodSummary)
+    deriving (ToJSON, ToSchema) via (Stripped MethodSummary)
 
 -- | Method collection list response
 newtype MethodCollectionListResponse = MethodCollectionListResponse
     { mclMethods :: [MethodCollectionStatusAPI]
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped MethodCollectionListResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped MethodCollectionListResponse)
 
 -- | Method collection status for API responses
 data MethodCollectionStatusAPI = MethodCollectionStatusAPI
@@ -334,14 +401,14 @@ data MethodCollectionStatusAPI = MethodCollectionStatusAPI
     , mcaFormat :: Maybe Text -- Format (e.g. "ILCD")
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped MethodCollectionStatusAPI)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped MethodCollectionStatusAPI)
 
 -- | Reference data list response (flow synonyms, compartment mappings, units)
 newtype RefDataListResponse = RefDataListResponse
     { rdlItems :: [RefDataStatusAPI]
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped RefDataListResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped RefDataListResponse)
 
 -- | Reference data status for API responses
 data RefDataStatusAPI = RefDataStatusAPI
@@ -354,14 +421,14 @@ data RefDataStatusAPI = RefDataStatusAPI
     , rdaEntryCount :: Int
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped RefDataStatusAPI)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped RefDataStatusAPI)
 
 -- | Synonym groups response
 newtype SynonymGroupsResponse = SynonymGroupsResponse
     { sgrGroups :: [[Text]]
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped SynonymGroupsResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped SynonymGroupsResponse)
 
 -- | Full method details
 data MethodDetail = MethodDetail
@@ -374,7 +441,7 @@ data MethodDetail = MethodDetail
     , mdFactorCount :: Int
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped MethodDetail)
+    deriving (ToJSON, ToSchema) via (Stripped MethodDetail)
 
 -- | Characterization factor for API response
 data MethodFactorAPI = MethodFactorAPI
@@ -384,7 +451,7 @@ data MethodFactorAPI = MethodFactorAPI
     , mfaValue :: Double -- CF value
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped MethodFactorAPI)
+    deriving (ToJSON, ToSchema) via (Stripped MethodFactorAPI)
 
 -- | A single flow's contribution to an LCIA score
 data FlowContributionEntry = FlowContributionEntry
@@ -397,7 +464,7 @@ data FlowContributionEntry = FlowContributionEntry
     , fcoCfValue :: Double -- Raw characterization factor value
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowContributionEntry)
+    deriving (ToJSON, ToSchema) via (Stripped FlowContributionEntry)
 
 -- | LCIA result for a single impact category
 data LCIAResult = LCIAResult
@@ -414,7 +481,7 @@ data LCIAResult = LCIAResult
     , lrTopContributors :: [FlowContributionEntry] -- Top contributing elementary flows
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped LCIAResult)
+    deriving (ToJSON, ToSchema) via (Stripped LCIAResult)
 
 -- | Contributing flows result: top elementary flows for a specific impact category
 data ContributingFlowsResult = ContributingFlowsResult
@@ -424,7 +491,7 @@ data ContributingFlowsResult = ContributingFlowsResult
     , cfrTopFlows :: [FlowContributionEntry]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ContributingFlowsResult)
+    deriving (ToJSON, ToSchema) via (Stripped ContributingFlowsResult)
 
 -- | A single activity's contribution to an LCIA score
 data ActivityContribution = ActivityContribution
@@ -436,7 +503,7 @@ data ActivityContribution = ActivityContribution
     , acSharePct :: Double -- Percentage of total score (0-100)
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ActivityContribution)
+    deriving (ToJSON, ToSchema) via (Stripped ActivityContribution)
 
 -- | Contributing activities result: top upstream activities for a specific impact category
 data ContributingActivitiesResult = ContributingActivitiesResult
@@ -446,14 +513,14 @@ data ContributingActivitiesResult = ContributingActivitiesResult
     , carActivities :: [ActivityContribution]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ContributingActivitiesResult)
+    deriving (ToJSON, ToSchema) via (Stripped ContributingActivitiesResult)
 
 -- | Batch impacts request: compute LCIA for every process in one call.
 newtype BatchImpactsRequest = BatchImpactsRequest
     { birProcessIds :: [Text]
     }
     deriving (Generic)
-    deriving (FromJSON) via (Stripped BatchImpactsRequest)
+    deriving (FromJSON, ToSchema) via (Stripped BatchImpactsRequest)
 
 -- | One entry of a batch impacts response.
 data BatchImpactsEntry = BatchImpactsEntry
@@ -462,7 +529,7 @@ data BatchImpactsEntry = BatchImpactsEntry
     , bieImpacts :: LCIABatchResult
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped BatchImpactsEntry)
+    deriving (ToJSON, ToSchema) via (Stripped BatchImpactsEntry)
 
 {- | Batch impacts response: one entry per successfully computed process,
 plus lists of process ids that could not be resolved.
@@ -473,7 +540,7 @@ data BatchImpactsResponse = BatchImpactsResponse
     , birInvalid :: [Text]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped BatchImpactsResponse)
+    deriving (ToJSON, ToSchema) via (Stripped BatchImpactsResponse)
 
 {- | A single scoring-set indicator: the per-variable normalized-weighted value
 plus the impact category it came from. Value is pre-multiplied by the
@@ -484,7 +551,7 @@ data ScoringIndicator = ScoringIndicator
     , siValue :: Double
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ScoringIndicator)
+    deriving (ToJSON, ToSchema) via (Stripped ScoringIndicator)
 
 -- | Batch LCIA result with optional single score
 data LCIABatchResult = LCIABatchResult
@@ -501,7 +568,7 @@ data LCIABatchResult = LCIABatchResult
     -- ^ Scoring set name → (variable name → indicator). One row per scoring variable.
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped LCIABatchResult)
+    deriving (ToJSON, ToSchema) via (Stripped LCIABatchResult)
 
 -- | Flow mapping status for a method
 data MappingStatus = MappingStatus
@@ -519,7 +586,7 @@ data MappingStatus = MappingStatus
     , mstUnmappedFlows :: [UnmappedFlowAPI] -- Details of unmapped flows
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped MappingStatus)
+    deriving (ToJSON, ToSchema) via (Stripped MappingStatus)
 
 -- | Details about an unmapped flow
 data UnmappedFlowAPI = UnmappedFlowAPI
@@ -528,7 +595,7 @@ data UnmappedFlowAPI = UnmappedFlowAPI
     , ufaDirection :: Text -- "Input" or "Output"
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped UnmappedFlowAPI)
+    deriving (ToJSON, ToSchema) via (Stripped UnmappedFlowAPI)
 
 -- | DB-flow-centric mapping: all biosphere flows with their CF assignments
 data FlowCFMapping = FlowCFMapping
@@ -539,7 +606,7 @@ data FlowCFMapping = FlowCFMapping
     , fcmFlows :: [FlowCFEntry]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowCFMapping)
+    deriving (ToJSON, ToSchema) via (Stripped FlowCFMapping)
 
 -- | A single DB biosphere flow with its CF assignment (if any)
 data FlowCFEntry = FlowCFEntry
@@ -551,7 +618,7 @@ data FlowCFEntry = FlowCFEntry
     , fceMatchStrategy :: Maybe Text -- "uuid" | "name" | "synonym"
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowCFEntry)
+    deriving (ToJSON, ToSchema) via (Stripped FlowCFEntry)
 
 -- | Characterization result: matched CFs for a method in a database
 data CharacterizationResult = CharacterizationResult
@@ -562,7 +629,7 @@ data CharacterizationResult = CharacterizationResult
     , chrFactors :: [CharacterizationEntry]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped CharacterizationResult)
+    deriving (ToJSON, ToSchema) via (Stripped CharacterizationResult)
 
 -- | A single matched characterization factor
 data CharacterizationEntry = CharacterizationEntry
@@ -578,14 +645,14 @@ data CharacterizationEntry = CharacterizationEntry
     , cheMatchStrategy :: Text -- "uuid", "cas", "name", "synonym", "fuzzy"
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped CharacterizationEntry)
+    deriving (ToJSON, ToSchema) via (Stripped CharacterizationEntry)
 
 -- | Database list response
 newtype DatabaseListResponse = DatabaseListResponse
     { dlrDatabases :: [DatabaseStatusAPI] -- All available databases
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped DatabaseListResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped DatabaseListResponse)
 
 -- | Database status for API responses
 data DatabaseStatusAPI = DatabaseStatusAPI
@@ -601,7 +668,7 @@ data DatabaseStatusAPI = DatabaseStatusAPI
     , dsaDependsOn :: [Text] -- Names of databases this one depends on (for cross-DB linking)
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped DatabaseStatusAPI)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped DatabaseStatusAPI)
 
 -- | Response for database activation
 data ActivateResponse = ActivateResponse
@@ -610,7 +677,7 @@ data ActivateResponse = ActivateResponse
     , arDatabase :: Maybe DatabaseStatusAPI
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivateResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivateResponse)
 
 {- | Response for the re-link endpoint: fresh cross-DB link stats after a
 second-pass linking against the currently-loaded databases.
@@ -623,21 +690,21 @@ data RelinkResponse = RelinkResponse
     , rrDependsOn :: [Text]
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped RelinkResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped RelinkResponse)
 
 -- | Result of auto-loading a single dependency
 data DepLoadResult
     = DepLoaded {dlrName :: Text}
     | DepLoadFailed {dlfName :: Text, dlfError :: Text}
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped DepLoadResult)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped DepLoadResult)
 
 -- | Response for the load database endpoint
 data LoadDatabaseResponse
     = LoadFailed {ldrError :: Text}
     | LoadSucceeded {ldrDatabase :: DatabaseStatusAPI, ldrDeps :: [DepLoadResult]}
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped LoadDatabaseResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped LoadDatabaseResponse)
 
 -- | Request for database upload (base64-encoded ZIP)
 data UploadRequest = UploadRequest
@@ -646,7 +713,7 @@ data UploadRequest = UploadRequest
     , urFileData :: Text -- Base64-encoded ZIP file content
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped UploadRequest)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped UploadRequest)
 
 -- | Response for database upload
 data UploadResponse = UploadResponse
@@ -656,7 +723,7 @@ data UploadResponse = UploadResponse
     , uprFormat :: Maybe Text -- Detected format (if successful)
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped UploadResponse)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped UploadResponse)
 
 -- | Supply chain response — all upstream activities with scaling factors
 data SupplyChainResponse = SupplyChainResponse
@@ -667,7 +734,7 @@ data SupplyChainResponse = SupplyChainResponse
     , scrEdges :: [SupplyChainEdge]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped SupplyChainResponse)
+    deriving (ToJSON, ToSchema) via (Stripped SupplyChainResponse)
 
 {- | A single entry in the supply chain. @sceProcessId@ is bare for entries
 from the root DB and qualified (@"dbName::pid"@) for entries reached via
@@ -687,7 +754,7 @@ data SupplyChainEntry = SupplyChainEntry
     , sceUpstreamCount :: Int -- number of unique upstream activities reachable from this one
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped SupplyChainEntry)
+    deriving (ToJSON, ToSchema) via (Stripped SupplyChainEntry)
 
 -- | An edge in the upstream supply chain subgraph
 data SupplyChainEdge = SupplyChainEdge
@@ -698,7 +765,7 @@ data SupplyChainEdge = SupplyChainEdge
     , sceEdgeAmount :: Double -- technosphere coefficient
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped SupplyChainEdge)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped SupplyChainEdge)
 
 {- | Request body for POST endpoints that accept substitutions.
 Substitutions modify the scaling vector via Sherman-Morrison rank-1 updates.
@@ -707,7 +774,7 @@ newtype SubstitutionRequest = SubstitutionRequest
     { srSubstitutions :: [Substitution]
     }
     deriving (Generic)
-    deriving (FromJSON) via (Stripped SubstitutionRequest)
+    deriving (FromJSON, ToSchema) via (Stripped SubstitutionRequest)
 
 {- | A single supplier substitution.
 
@@ -724,7 +791,7 @@ data Substitution = Substitution
     , subConsumer :: Text -- Consumer activity ProcessId (bare or dbName::pid)
     }
     deriving (Generic)
-    deriving (FromJSON) via (Stripped Substitution)
+    deriving (FromJSON, ToSchema) via (Stripped Substitution)
 
 {- | A single rank-1 perturbation of a technosphere coefficient @A_ij@.
 
@@ -739,14 +806,14 @@ data Perturbation = Perturbation
     , perLabel :: Maybe Text -- Optional label for response correlation
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped Perturbation)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped Perturbation)
 
 -- | Request body for POST sensitivity endpoints. Flat list, V1.
 newtype SensitivityRequest = SensitivityRequest
     { srPerturbations :: [Perturbation]
     }
     deriving (Generic)
-    deriving (FromJSON) via (Stripped SensitivityRequest)
+    deriving (FromJSON, ToSchema) via (Stripped SensitivityRequest)
 
 {- | One result entry per perturbation. The 'peResult' carries either an
 error message ('Left') or the (impact, deltaImpact) pair ('Right'). The
@@ -761,13 +828,36 @@ data PerturbedEntry = PerturbedEntry
     }
     deriving (Generic)
 
+-- | Manual schema for PerturbedEntry: the Either inside is flattened by ToJSON
+-- to {perturbation, impact, deltaImpact} on success and {perturbation, error}
+-- on failure. The Generic-derived schema would expose the Haskell shape
+-- (a oneOf wrapper around the Either) instead of the flat wire format.
+instance ToSchema PerturbedEntry where
+    declareNamedSchema _ = do
+        pertRef <- declareSchemaRef (Proxy :: Proxy Perturbation)
+        lciaRef <- declareSchemaRef (Proxy :: Proxy LCIAResult)
+        doubleRef <- declareSchemaRef (Proxy :: Proxy Double)
+        textRef <- declareSchemaRef (Proxy :: Proxy Text)
+        pure $
+            NamedSchema (Just "PerturbedEntry") $
+                mempty
+                    & type_ ?~ OpenApiObject
+                    & properties
+                        .~ InsOrdHashMap.fromList
+                            [ ("perturbation", pertRef)
+                            , ("impact", lciaRef)
+                            , ("deltaImpact", doubleRef)
+                            , ("error", textRef)
+                            ]
+                    & required .~ ["perturbation"]
+
 -- | Sensitivity response: baseline LCIA + one entry per perturbation (in order).
 data SensitivityResponse = SensitivityResponse
     { srBaseline :: LCIAResult
     , srPerturbed :: [PerturbedEntry]
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped SensitivityResponse)
+    deriving (ToJSON, ToSchema) via (Stripped SensitivityResponse)
 
 {- | Name of the request-level "root" database — the DB extracted from the
 URL path and the implicit target of any bare 'ProcessId' (one without the
@@ -817,7 +907,7 @@ data ExchangeWithUnit = ExchangeWithUnit
     , ewuPedigree :: Maybe Pedigree -- LCA data-quality scores when available (mirrors exchangePedigree)
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ExchangeWithUnit)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ExchangeWithUnit)
 
 -- | Activity information optimized for API responses
 data ActivityForAPI = ActivityForAPI
@@ -835,7 +925,7 @@ data ActivityForAPI = ActivityForAPI
     , pfaExchanges :: [ExchangeWithUnit] -- Exchanges with unit names
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivityForAPI)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityForAPI)
 
 -- | Streamlined activity information - core data only
 data ActivityInfo = ActivityInfo
@@ -845,7 +935,7 @@ data ActivityInfo = ActivityInfo
     , piLinks :: ActivityLinks -- Links to sub-resources
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivityInfo)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityInfo)
 
 -- | Extended activity metadata
 data ActivityMetadata = ActivityMetadata
@@ -857,7 +947,7 @@ data ActivityMetadata = ActivityMetadata
     , pmReferenceProductFlow :: Maybe UUID -- Flow ID of reference product
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivityMetadata)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityMetadata)
 
 -- | Links to related resources
 data ActivityLinks = ActivityLinks
@@ -867,7 +957,7 @@ data ActivityLinks = ActivityLinks
     , plReferenceProductUrl :: Maybe Text -- URL to reference product (if exists)
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivityLinks)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityLinks)
 
 -- | Activity statistics
 data ActivityStats = ActivityStats
@@ -877,7 +967,7 @@ data ActivityStats = ActivityStats
     , psLocation :: Text
     }
     deriving (Generic)
-    deriving (ToJSON, FromJSON) via (Stripped ActivityStats)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityStats)
 
 -- | Flow with additional metadata. Carries either a tech or bio flow.
 data FlowDetail = FlowDetail
@@ -886,7 +976,7 @@ data FlowDetail = FlowDetail
     , fdUsageCount :: Int -- How many activities use this flow
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped FlowDetail)
+    deriving (ToJSON, ToSchema) via (Stripped FlowDetail)
 
 {- | Exchange with flow, unit, and target activity information.
 The carried flow's variant lines up with the Exchange variant.
@@ -900,7 +990,7 @@ data ExchangeDetail = ExchangeDetail
     , edTargetActivity :: Maybe ActivitySummary -- Target activity for technosphere inputs
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ExchangeDetail)
+    deriving (ToJSON, ToSchema) via (Stripped ExchangeDetail)
 
 -- | A single filter entry returned in a preset
 data ClassificationEntryInfo = ClassificationEntryInfo
@@ -909,7 +999,7 @@ data ClassificationEntryInfo = ClassificationEntryInfo
     , ceiMode :: !Text -- "exact" or "contains"
     }
     deriving (Show, Eq, Generic)
-    deriving (ToJSON) via (Stripped ClassificationEntryInfo)
+    deriving (ToJSON, ToSchema) via (Stripped ClassificationEntryInfo)
 
 -- | A named filter preset (from TOML config)
 data ClassificationPresetInfo = ClassificationPresetInfo
@@ -919,7 +1009,7 @@ data ClassificationPresetInfo = ClassificationPresetInfo
     , cpiFilters :: ![ClassificationEntryInfo]
     }
     deriving (Show, Eq, Generic)
-    deriving (ToJSON) via (Stripped ClassificationPresetInfo)
+    deriving (ToJSON, ToSchema) via (Stripped ClassificationPresetInfo)
 
 -- | Classification system with its values for browsing/filtering
 data ClassificationSystem = ClassificationSystem
@@ -928,7 +1018,7 @@ data ClassificationSystem = ClassificationSystem
     , csActivityCount :: Int -- How many activities have this system
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped ClassificationSystem)
+    deriving (ToJSON, ToSchema) via (Stripped ClassificationSystem)
 
 {- | Result of an /activity/{pid}/aggregate call.
 
@@ -943,7 +1033,7 @@ data Aggregation = Aggregation
     , aggGroups :: [AggregationGroup] -- one entry per group_by bucket (empty when group_by omitted)
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped Aggregation)
+    deriving (ToJSON, ToSchema) via (Stripped Aggregation)
 
 -- | One bucket in an aggregation result.
 data AggregationGroup = AggregationGroup
@@ -954,16 +1044,18 @@ data AggregationGroup = AggregationGroup
     , aggCount :: Int
     }
     deriving (Generic)
-    deriving (ToJSON) via (Stripped AggregationGroup)
+    deriving (ToJSON, ToSchema) via (Stripped AggregationGroup)
 
 -- JSON instances. All record types use API.JsonOptions.stripLowerPrefix
 -- via the strippedToJSON/strippedToEncoding/strippedParseJSON helpers.
 -- Sum-only types (NodeType, EdgeType, FlowRole) keep default derivation.
-instance (ToJSON a) => ToJSON (SearchResults a) where toJSON = strippedToJSON; toEncoding = strippedToEncoding
+-- ToJSON / FromJSON / ToSchema for SearchResults a are standalone-derived
+-- alongside the data declaration above (line ~169).
 instance ToJSON NodeType
 instance ToJSON EdgeType
 instance ToJSON FlowRole
-instance ToJSON Unit where toJSON = strippedToJSON; toEncoding = strippedToEncoding
+-- ToJSON / FromJSON / ToSchema for Unit are derived via Stripped alongside
+-- its data declaration in src/Types.hs.
 
 -- Custom ToJSON for PerturbedEntry: flatten the Either so success entries
 -- have impact+deltaImpact and error entries have error.
@@ -975,7 +1067,7 @@ instance ToJSON PerturbedEntry where
                 Right (lcia, d) -> ["impact" .= lcia, "deltaImpact" .= d]
 
 -- FromJSON instances needed for API conversion
-instance (FromJSON a) => FromJSON (SearchResults a) where parseJSON = strippedParseJSON
+-- (FromJSON (SearchResults a) above)
 
 -- openapi3 cannot derive ToSchema for BSL.ByteString directly
 newtype BinaryContent = BinaryContent BSL.ByteString
diff --git a/src/Types.hs b/src/Types.hs
index 47ddec62..b054886e 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -36,6 +36,7 @@ import Data.List (nub)
 import Search.BM25.Types (BM25Index)
 import SynonymDB (normalizeName)
 import SynonymDB.Types (SynonymDB)
+import Data.OpenApi (ToSchema)
 
 -- | Orphan Store instance for UUID (16 bytes, host-native word order)
 instance Store UUID where
@@ -65,7 +66,7 @@ data Compartment = Compartment
     , compartmentSub :: !(Maybe Text) -- "high. pop.", "river water", …
     }
     deriving (Eq, Show, Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped Compartment)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped Compartment)
 
 {- | The biosphere flow's medium (air | water | soil | …), or @""@ when the
 source dataset omitted the compartment. Use 'bfCompartment' directly when
@@ -90,6 +91,7 @@ biosphere side also gets named variants instead of a load-bearing 'Bool'.
 -}
 data BioDirection = Resource | Emission
     deriving (Eq, Show, Generic, NFData, Store)
+    deriving anyclass (ToSchema)
 
 {- | Role of a technosphere exchange within its host activity. Names the four
 valid combinations of (input?, reference?). `ReferenceInput` is the
@@ -102,6 +104,7 @@ data TechRole
     | ReferenceInput -- main input of a treatment process
     | Input -- ordinary technosphere input
     deriving (Eq, Show, Generic, NFData, Store)
+    deriving anyclass (ToSchema)
 
 -- | Unit representation (kg, MJ, m³, etc.)
 data Unit = Unit
@@ -111,6 +114,7 @@ data Unit = Unit
     , unitComment :: !Text -- Description/comment
     }
     deriving (Generic, NFData, Store)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped Unit)
 
 {- | Substance - groups flows with the same chemical identity across databases
 Used for flow matching between different LCA databases (ecoinvent, ILCD, SimaPro)
@@ -137,7 +141,7 @@ data TechnosphereFlow = TechnosphereFlow
     , tfSubstanceId :: !(Maybe Int)
     }
     deriving (Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped TechnosphereFlow)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped TechnosphereFlow)
 
 {- | A biosphere flow — an environmental exchange (resource extraction or
 emission). Always carries a `Compartment` identifying the medium.
@@ -157,7 +161,7 @@ data BiosphereFlow = BiosphereFlow
     -}
     }
     deriving (Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped BiosphereFlow)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped BiosphereFlow)
 
 {- | A waste flow — a residual output that a process generates and which a
 treatment activity may consume as its reference input. Sister type to
@@ -179,7 +183,7 @@ data WasteFlow = WasteFlow
     , wfSubstanceId :: !(Maybe Int)
     }
     deriving (Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped WasteFlow)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped WasteFlow)
 
 {- | Pedigree matrix (Weidema & Wesnæs 1996) — five LCA data-quality scores
 each in 1..5 (1 = best, 5 = worst). SimaPro CSV encodes it as a prefix in the
@@ -193,7 +197,7 @@ data Pedigree = Pedigree
     , pedTechnological :: !Int -- 1..5
     }
     deriving (Eq, Show, Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped Pedigree)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped Pedigree)
 
 {- | Smart constructor: rejects out-of-range values (anything not in 1..5)
 by returning Nothing. Callers should treat Nothing as "no pedigree
@@ -243,7 +247,7 @@ data Exchange
         , waPedigree :: !(Maybe Pedigree) -- LCA data-quality scores when available
         }
     deriving (Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped Exchange)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped Exchange)
 
 -- | Helper functions for Exchange variants
 exchangeFlowId :: Exchange -> UUID

From 7fdb56570f307f91dd83e0516cbddeadc18bee70 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 11:09:59 +0200
Subject: [PATCH 12/43] refactor(api): loadCollection and crossDBSolutionFor
 read env, not args
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the explicit DatabaseManager parameter from loadCollection and
crossDBSolutionFor; both now `asks getDatabaseManager` inside their
AppM body. Call sites lose the corresponding closure-captured arg.

Net delta is small (+2 LOC) — the body gains the `asks` line but each
call site loses one word. The win is architectural: every helper that
needs the database manager now declares it as a capability requirement
on the monad, not as a positional parameter on every signature, which
is the Has-pattern (capability-class projection) the prior commits
laid the foundation for.

The remaining helpers that still take DatabaseManager (prepMethodCtx,
buildPerDbSetTables, batchedScoresFor, computeCategoryResult,
mkMcpCrossDBEntry) run in IO, not AppM, so a similar conversion would
require either widening them to AppM (cascading change) or threading
a Reader instance through IO. Both belong to follow-ups (notably the
MCP Freer refactor, which would naturally bring the MCP-side helpers
into AppM as well).
---
 src/API/Routes.hs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/API/Routes.hs b/src/API/Routes.hs
index 3bcfc67f..cbeeb53c 100644
--- a/src/API/Routes.hs
+++ b/src/API/Routes.hs
@@ -509,8 +509,9 @@ throwServiceError (Service.InvalidUUID _) = throwError err500{errBody = "Interna
 throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Internal server error"}
 
 -- | Load a method collection by name from the live DatabaseManager state.
-loadCollection :: DatabaseManager -> Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet])
-loadCollection dbManager collectionName = do
+loadCollection :: Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet])
+loadCollection collectionName = do
+    dbManager <- asks getDatabaseManager
     loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager)
     case M.lookup collectionName loadedCollections of
         Just mc -> return (mcMethods mc, mcDamageCategories mc, mcNormWeightSets mc, mcScoringSets mc)
@@ -520,10 +521,11 @@ loadCollection dbManager collectionName = do
 no-substitution path ('requireFullyLinked' runs inside 'solutionWithDeps');
 'Just' applies the substitutions through the uncached path.
 -}
-crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution
-crossDBSolutionFor dbManager dbName db solver pid mSub = case mSub of
+crossDBSolutionFor :: Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution
+crossDBSolutionFor dbName db solver pid mSub = case mSub of
     Nothing -> solutionWithDeps dbName db solver pid
     Just subReq -> do
+        dbManager <- asks getDatabaseManager
         requireFullyLinked dbName db
         unitCfg <- liftIO $ getMergedUnitConfig dbManager
         eSol <-
@@ -790,11 +792,11 @@ activityLCIABatchH dbName processIdText collectionName mSub = do
     dbManager <- asks getDatabaseManager
     (db, sharedSolver) <- requireDatabaseByName dbName
     (actProcessId, activity) <- resolveOrThrow db processIdText
-    (methods, damageCats, nwSets, scoringSets) <- loadCollection dbManager collectionName
+    (methods, damageCats, nwSets, scoringSets) <- loadCollection collectionName
     let dcLookup = M.fromList [(subName, dcName dc) | dc <- damageCats, (subName, _) <- dcImpacts dc]
         mNW = case nwSets of (nw : _) -> Just nw; [] -> Nothing
     t0 <- liftIO getCurrentTime
-    sol <- crossDBSolutionFor dbManager dbName db sharedSolver actProcessId mSub
+    sol <- crossDBSolutionFor dbName db sharedSolver actProcessId mSub
     t1 <- liftIO getCurrentTime
     let inventory = SharedSolver.csInventory sol
         !invSize = M.size inventory
@@ -1162,7 +1164,7 @@ lcaServer env =
     activityInventoryCore dbName processIdText mSub = do
         (db, sharedSolver) <- requireDatabaseByName dbName
         (processId, activity) <- resolveOrThrow db processIdText
-        sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub
+        sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
         (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
         pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol)
 
@@ -1392,7 +1394,7 @@ lcaServer env =
         (db, sharedSolver) <- requireDatabaseByName dbName
         method <- loadMethodByUUID methodIdText
         (processId, activity) <- resolveOrThrow db processIdText
-        sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub
+        sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
         result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method
         when (isNothing mSub) $ liftIO $ logLCIAResult result method
         pure result

From 73e195089da3e937aa29c9a779000bf6a97660b1 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 11:13:41 +0200
Subject: [PATCH 13/43] =?UTF-8?q?refactor(dbhandlers):=20factor=20IO/Eithe?=
 =?UTF-8?q?r=E2=86=92400=20ladder=20into=20ioEither400?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three handlers (addDependencyHandler, removeDependencyHandler,
setDataPathHandler) all had the same shape:

    result <- liftIO $ X
    case result of
        Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
        Right v -> return v

Extract that pattern as @ioEither400 :: IO (Either Text a) -> AppM a@
next to @simpleAction@. Each handler body shrinks from a 5-line
case-ladder to a single ioEither400 call.

The other Left→throwError sites in this file use different status codes
(404, custom response shapes like LoadFailed / RelinkResponse) and
don't fit a single helper, so they're left alone — there's no point
forcing a Procrustean abstraction.

LOC roughly break-even — the helper costs lines its callers save —
but the pattern is now named and reusable.
---
 src/API/DatabaseHandlers.hs | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs
index ae990c6e..aa011473 100644
--- a/src/API/DatabaseHandlers.hs
+++ b/src/API/DatabaseHandlers.hs
@@ -337,27 +337,21 @@ getDatabaseSetupHandler dbName= do
 Runs cross-DB linking and returns updated setup info
 -}
 addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
-addDependencyHandler dbName depName= do
+addDependencyHandler dbName depName = do
     dbManager <- asks getDatabaseManager
-    result <- liftIO $ addDependencyToStaged dbManager dbName depName
-    case result of
-        Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
-        Right setupInfo -> return setupInfo
+    ioEither400 (addDependencyToStaged dbManager dbName depName)
 
 {- | Remove a dependency from a staged database
 Re-runs cross-DB linking and returns updated setup info
 -}
 removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
-removeDependencyHandler dbName depName= do
+removeDependencyHandler dbName depName = do
     dbManager <- asks getDatabaseManager
-    result <- liftIO $ removeDependencyFromStaged dbManager dbName depName
-    case result of
-        Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
-        Right setupInfo -> return setupInfo
+    ioEither400 (removeDependencyFromStaged dbManager dbName depName)
 
 -- | Change the data path for an uploaded (staged) database
 setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo
-setDataPathHandler dbName body= do
+setDataPathHandler dbName body = do
     dbManager <- asks getDatabaseManager
     -- Extract "path" from JSON body
     let mPath = case body of
@@ -367,11 +361,7 @@ setDataPathHandler dbName body= do
             _ -> Nothing
     case mPath of
         Nothing -> throwError $ err400{errBody = "Missing \"path\" field in request body"}
-        Just newPath -> do
-            result <- liftIO $ setDataPath dbManager dbName newPath
-            case result of
-                Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
-                Right setupInfo -> return setupInfo
+        Just newPath -> ioEither400 (setDataPath dbManager dbName newPath)
 
 {- | Finalize a staged database
 Builds matrices and makes it ready for queries
@@ -460,6 +450,18 @@ simpleAction action successMsg = do
         Left err -> ActivateResponse False err Nothing
         Right () -> ActivateResponse True successMsg Nothing
 
+{- | @ioEither400 m@ runs an IO action that returns @Either Text a@; on
+@Left@ throws a 400 with the message body, on @Right@ propagates. Used
+to compress the @do result <- liftIO ...; case result of Left … Right …@
+ladder that recurs across every handler returning a typed payload.
+-}
+ioEither400 :: IO (Either Text a) -> AppM a
+ioEither400 action = do
+    result <- liftIO action
+    case result of
+        Left err -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 err}
+        Right v -> return v
+
 --------------------------------------------------------------------------------
 -- Reference Data Handlers (flow synonyms, compartment mappings, units)
 --------------------------------------------------------------------------------

From 679b570860544858dc77798485ea6a33a9875f6d Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 12:11:32 +0200
Subject: [PATCH 14/43] refactor(schema): LocationFallback/LocationUnresolved
 self-encode via Stripped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Give LocationKind a stable ToJSON (lowercase wire codes via the existing
locationKindCode) and migrate LocationFallback and LocationUnresolved to
attached `deriving (ToJSON, FromJSON) via (Stripped X)`. The custom
encodeFallback / encodeUnresolved helpers inside DatabaseSetupInfo's
manual ToJSON disappear — the parent now just serializes the field
directly and the structured serialization comes from the field type.

Categorically: pushing the ToJSON dictionary down to the leaf types
gives us the same property as `cata` in recursion schemes — the
encoder for a structure is the composition of encoders for its parts.
Aeson's deriving machinery delivers exactly that composition through
the Generic Rep.

Confirmed byte-identical at runtime: aeson preserves field-declaration
order in its Object; Stripped's stripLowerPrefix turns
{lfProduct, lfRequested, lfActual, lfKind} into
{product, requested, actual, kind} — the same keys the manual encoder
emitted, in the same order. openapi.json still byte-identical (it's
generated from the type-level schema, which was already custom for
LocationKind and unchanged for the records). Hspec 1052/1052 green.
---
 src/Database/Manager.hs | 19 ++++---------------
 src/Types.hs            | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs
index 2a7ffb97..8140c7d3 100644
--- a/src/Database/Manager.hs
+++ b/src/Database/Manager.hs
@@ -350,26 +350,15 @@ instance ToJSON DatabaseSetupInfo where
             , "dependencies" .= dsiDependencies
             , "isReady" .= dsiIsReady
             , "unknownUnits" .= dsiUnknownUnits
-            , "locationFallbacks" .= map encodeFallback dsiLocationFallbacks
-            , "locationUnresolved" .= map encodeUnresolved dsiLocationUnresolved
+            , -- LocationFallback / LocationUnresolved carry their own
+              -- Stripped-derived ToJSON now; no per-site encoder needed.
+              "locationFallbacks" .= dsiLocationFallbacks
+            , "locationUnresolved" .= dsiLocationUnresolved
             , "dataPath" .= dsiDataPath
             , "availablePaths" .= map encodeCandidate dsiAvailablePaths
             , "isLoaded" .= dsiIsLoaded
             ]
       where
-        encodeFallback LocationFallback{lfProduct, lfRequested, lfActual, lfKind} =
-            A.object
-                [ "product" .= lfProduct
-                , "requested" .= lfRequested
-                , "actual" .= lfActual
-                , "kind" .= locationKindCode lfKind
-                ]
-        encodeUnresolved LocationUnresolved{luProduct, luRequested, luReason} =
-            A.object
-                [ "product" .= luProduct
-                , "requested" .= luRequested
-                , "reason" .= luReason
-                ]
         encodeCandidate (path, fmt, cnt) =
             A.object
                 ["path" .= path, "format" .= fmt, "fileCount" .= cnt]
diff --git a/src/Types.hs b/src/Types.hs
index b054886e..b3681e94 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -1044,6 +1044,22 @@ locationKindCode ParentLoc = "parent"
 locationKindCode GlobalLoc = "global"
 locationKindCode UnrelatedLoc = "unrelated"
 
+-- | Lowercase-wire-code ToJSON for 'LocationKind'. Stays in lock-step with
+-- 'locationKindCode' so the JSON output and the rejection-reason text can
+-- never drift apart.
+instance ToJSON LocationKind where
+    toJSON = toJSON . locationKindCode
+
+instance FromJSON LocationKind where
+    parseJSON v = do
+        s <- parseJSON v
+        case (s :: Text) of
+            "exact" -> pure ExactLoc
+            "parent" -> pure ParentLoc
+            "global" -> pure GlobalLoc
+            "unrelated" -> pure UnrelatedLoc
+            other -> fail $ "Invalid LocationKind: " <> T.unpack other
+
 -- | A product whose supplier was found at a wider geography than requested.
 data LocationFallback = LocationFallback
     { lfProduct :: !Text
@@ -1052,6 +1068,7 @@ data LocationFallback = LocationFallback
     , lfKind :: !LocationKind
     }
     deriving (Show, Eq, Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped LocationFallback)
 
 {- | A product whose supplier could not be linked — either because no candidate
 matched the name/unit, or because every geographic candidate was rejected by
@@ -1063,6 +1080,7 @@ data LocationUnresolved = LocationUnresolved
     , luReason :: !Text
     }
     deriving (Show, Eq, Generic, NFData, Store)
+    deriving (ToJSON, FromJSON) via (Stripped LocationUnresolved)
 
 {- | Statistics from cross-database linking
 Only essential state is stored; counts are derived via accessor functions.

From f90ba14acf5e6a8088bc2a9ed13f04419dcab02d Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 12:15:23 +0200
Subject: [PATCH 15/43] refactor(schema): co-locate ToSchema LocationKind with
 its data decl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The string-enum ToSchema for LocationKind (lowercase wire codes matching
locationKindCode) joins its ToJSON/FromJSON instances next to the data
declaration in src/Types.hs. Unblocks `deriving ToSchema via Stripped`
on LocationFallback and LocationUnresolved — the dependent schema lookup
now finds the LocationKind instance directly, not through an orphan in
API/OpenApi.hs that downstream modules can't see.

src/API/OpenApi.hs continues to shrink toward its real role: the
enrichWithResources post-processor plus a handful of genuinely-bespoke
schema instances (Value, BinaryContent, ApiFlow, the Database.Manager
domain types). Every "type with a generic stripped schema" now lives
next to its data declaration — the schema, the JSON encoder, and the
data definition share one source location.
---
 src/API/OpenApi.hs | 22 +++-------------------
 src/Types.hs       | 21 ++++++++++++++++++---
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs
index febd7612..3ad6ac58 100644
--- a/src/API/OpenApi.hs
+++ b/src/API/OpenApi.hs
@@ -47,25 +47,9 @@ instance ToSchema MissingSupplier
 instance ToSchema DependencyStatus
 instance ToSchema DependencyChoice
 
--- Manual schema for LocationKind: ToJSON in Database.Manager.encodeFallback
--- emits lowercase wire codes (exact / parent / global / unrelated) via
--- Types.locationKindCode. The generic schema would otherwise advertise the
--- raw Haskell constructor names.
-instance ToSchema LocationKind where
-    declareNamedSchema _ =
-        pure $
-            NamedSchema (Just "LocationKind") $
-                mempty
-                    & type_ ?~ OpenApiString
-                    & enum_
-                        ?~ [ toJSON ("exact" :: Text)
-                           , toJSON ("parent" :: Text)
-                           , toJSON ("global" :: Text)
-                           , toJSON ("unrelated" :: Text)
-                           ]
-
-instance ToSchema LocationFallback where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-instance ToSchema LocationUnresolved where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
+-- ToSchema for LocationKind / LocationFallback / LocationUnresolved derived
+-- alongside their data declarations in src/Types.hs (LocationKind as a string
+-- enum matching the lowercase wire codes; the records via Stripped).
 instance ToSchema DatabaseSetupInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
 
 -- API.Types — every record type uses strippedSchemaOptions so the generated
diff --git a/src/Types.hs b/src/Types.hs
index b3681e94..1c180256 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -36,7 +36,8 @@ import Data.List (nub)
 import Search.BM25.Types (BM25Index)
 import SynonymDB (normalizeName)
 import SynonymDB.Types (SynonymDB)
-import Data.OpenApi (ToSchema)
+import Control.Lens ((&), (?~))
+import Data.OpenApi (NamedSchema (..), OpenApiType (..), ToSchema (..), enum_, type_)
 
 -- | Orphan Store instance for UUID (16 bytes, host-native word order)
 instance Store UUID where
@@ -1060,6 +1061,20 @@ instance FromJSON LocationKind where
             "unrelated" -> pure UnrelatedLoc
             other -> fail $ "Invalid LocationKind: " <> T.unpack other
 
+-- | OpenAPI schema for 'LocationKind' as a string-enum matching the wire codes
+-- produced by 'locationKindCode'. The generic schema would expose the raw
+-- Haskell constructor names; this keeps the spec in sync with the ToJSON.
+instance ToSchema LocationKind where
+    declareNamedSchema _ =
+        pure $
+            NamedSchema (Just "LocationKind") $
+                mempty
+                    & type_ ?~ OpenApiString
+                    & enum_
+                        ?~ [ toJSON (c :: Text)
+                           | c <- ["exact", "parent", "global", "unrelated"]
+                           ]
+
 -- | A product whose supplier was found at a wider geography than requested.
 data LocationFallback = LocationFallback
     { lfProduct :: !Text
@@ -1068,7 +1083,7 @@ data LocationFallback = LocationFallback
     , lfKind :: !LocationKind
     }
     deriving (Show, Eq, Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped LocationFallback)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped LocationFallback)
 
 {- | A product whose supplier could not be linked — either because no candidate
 matched the name/unit, or because every geographic candidate was rejected by
@@ -1080,7 +1095,7 @@ data LocationUnresolved = LocationUnresolved
     , luReason :: !Text
     }
     deriving (Show, Eq, Generic, NFData, Store)
-    deriving (ToJSON, FromJSON) via (Stripped LocationUnresolved)
+    deriving (ToJSON, FromJSON, ToSchema) via (Stripped LocationUnresolved)
 
 {- | Statistics from cross-database linking
 Only essential state is stored; counts are derived via accessor functions.

From 323d87cc62152203b8c7d37db99953a6fd48625b Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 12:51:54 +0200
Subject: [PATCH 16/43] refactor(schema):
 MissingSupplier/DependencyChoice/DatabaseSetupInfo via Stripped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Migrate the three remaining hand-rolled ToJSON instances in
Database.Manager onto the Stripped DerivingVia carrier:

  - MissingSupplier: 9 lines of manual `A.object [...]` → 1 deriving
    clause. Wire format unchanged (msProductName → productName, etc.).
  - DependencyChoice: 7 lines → 1 deriving clause. Wire format
    unchanged.
  - DatabaseSetupInfo: 25 lines (with inline encodeFallback /
    encodeUnresolved / encodeCandidate helpers) → 1 deriving clause.

To make the last one work, the bespoke 3-tuple
@dsiAvailablePaths :: ![(Text, Text, Int)]@ is promoted to a proper
record @data PathCandidate = PathCandidate { pcPath, pcFormat,
pcFileCount }@, which also derives via Stripped. The inline
encodeCandidate helper inside DatabaseSetupInfo's ToJSON disappears
along with the rest.

Runtime JSON: byte-identical. aeson's Generic ToJSON preserves field
declaration order, which matches the order the manual instances used.
Key names match (stripLowerPrefix on the prefixed Haskell field names
produces the same JSON keys the manual code emitted).

OpenAPI schema: changes deliberately. Before, `availablePaths` was a
positional 3-tuple — `[[string, string, int]]` — which is awkward in
generated clients and was already at odds with what the wire actually
emitted (an array of objects). After: the schema reflects the real
wire shape — `array<PathCandidate>` with named `path`, `format`,
`fileCount` properties. A new `PathCandidate` schema component is
added. The pre-existing inconsistency (schema said tuple, wire said
object) is now resolved in favor of the wire.

Categorically, this is the generic/specific decomposition the prior
commits established: the type-level Stripped carrier is the *generic*
machinery (one definition, one piece of code that handles
field-name-stripping for every record), and what each type declares
is purely *specific* — "I am a record with these fields". The
serialization, schema, and shape now share one source location per
type; the JSON encoder, the OpenAPI schema generator, and the data
declaration cannot drift apart.

Hspec 1052/1052 green.
---
 src/Database/Manager.hs | 79 ++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 52 deletions(-)

diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs
index 8140c7d3..ac947cc4 100644
--- a/src/Database/Manager.hs
+++ b/src/Database/Manager.hs
@@ -1,7 +1,8 @@
 {-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DeriveAnyClass #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingVia #-}
 {-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE ScopedTypeVariables #-}
@@ -104,7 +105,9 @@ import Control.Concurrent.STM
 import Control.Exception (SomeException, try)
 import qualified Control.Exception
 import Control.Monad (forM, forM_, unless, when)
+import API.JsonOptions (Stripped (..))
 import Data.Aeson (FromJSON (..), ToJSON (..), (.:), (.:?), (.=))
+import Data.OpenApi (ToSchema)
 import qualified Data.Aeson as A
 import Data.Bifunctor (first)
 import Data.Char (toLower)
@@ -254,16 +257,7 @@ data MissingSupplier = MissingSupplier
     -- ^ e.g. "kg vs ton", "FR not available"
     }
     deriving (Show, Eq, Generic)
-
-instance ToJSON MissingSupplier where
-    toJSON MissingSupplier{..} =
-        A.object
-            [ "productName" .= msProductName
-            , "count" .= msCount
-            , "location" .= msLocation
-            , "reason" .= msReason
-            , "detail" .= msDetail
-            ]
+    deriving (ToJSON) via (Stripped MissingSupplier)
 
 {- | Whether a candidate dependency is currently selected, merely available,
 or redundant under the minimal cover (matches links but every link it wins
@@ -285,15 +279,23 @@ data DependencyChoice = DependencyChoice
     , dchMatchCount :: !Int
     }
     deriving (Show, Eq, Generic)
+    deriving (ToJSON) via (Stripped DependencyChoice)
 
-instance ToJSON DependencyChoice where
-    toJSON DependencyChoice{..} =
-        A.object
-            [ "status" .= dchStatus
-            , "databaseName" .= dchDatabaseName
-            , "displayName" .= dchDisplayName
-            , "matchCount" .= dchMatchCount
-            ]
+{- | One of the candidate data directories inside an uploaded database's
+upload root. Surfaces in @DatabaseSetupInfo.dsiAvailablePaths@ so the UI
+can present a picker. The schema is now a proper named object instead
+of a positional 3-tuple.
+-}
+data PathCandidate = PathCandidate
+    { pcPath :: !Text
+    -- ^ Relative path under the upload root
+    , pcFormat :: !Text
+    -- ^ Format label (e.g. "EcoSpold 2", "SimaPro CSV", "Unknown")
+    , pcFileCount :: !Int
+    -- ^ Number of data files detected in this directory
+    }
+    deriving (Show, Eq, Generic)
+    deriving (ToJSON, ToSchema) via (Stripped PathCandidate)
 
 -- | Setup info for a database (for the setup page)
 data DatabaseSetupInfo = DatabaseSetupInfo
@@ -328,40 +330,13 @@ data DatabaseSetupInfo = DatabaseSetupInfo
     -}
     , dsiDataPath :: !Text
     -- ^ Current selected data path (relative)
-    , dsiAvailablePaths :: ![(Text, Text, Int)]
-    -- ^ (relativePath, formatLabel, fileCount)
+    , dsiAvailablePaths :: ![PathCandidate]
+    -- ^ Candidate data directories within the upload root
     , dsiIsLoaded :: !Bool
     -- ^ True if database is already loaded (read-only info)
     }
     deriving (Show, Eq, Generic)
-
-instance ToJSON DatabaseSetupInfo where
-    toJSON DatabaseSetupInfo{..} =
-        A.object
-            [ "name" .= dsiName
-            , "displayName" .= dsiDisplayName
-            , "activityCount" .= dsiActivityCount
-            , "inputCount" .= dsiInputCount
-            , "completeness" .= dsiCompleteness
-            , "internalLinks" .= dsiInternalLinks
-            , "crossDBLinks" .= dsiCrossDBLinks
-            , "unresolvedLinks" .= dsiUnresolvedLinks
-            , "missingSuppliers" .= dsiMissingSuppliers
-            , "dependencies" .= dsiDependencies
-            , "isReady" .= dsiIsReady
-            , "unknownUnits" .= dsiUnknownUnits
-            , -- LocationFallback / LocationUnresolved carry their own
-              -- Stripped-derived ToJSON now; no per-site encoder needed.
-              "locationFallbacks" .= dsiLocationFallbacks
-            , "locationUnresolved" .= dsiLocationUnresolved
-            , "dataPath" .= dsiDataPath
-            , "availablePaths" .= map encodeCandidate dsiAvailablePaths
-            , "isLoaded" .= dsiIsLoaded
-            ]
-      where
-        encodeCandidate (path, fmt, cnt) =
-            A.object
-                ["path" .= path, "format" .= fmt, "fileCount" .= cnt]
+    deriving (ToJSON) via (Stripped DatabaseSetupInfo)
 
 -- | Errors from getDatabaseSetupInfo
 data SetupError = SetupNotFound Text | SetupFailed Text
@@ -2051,9 +2026,9 @@ buildLoadedSetupInfo config db configs indexedDbs =
             }
 
 {- | Discover candidate data paths within an uploaded database's root directory.
-Returns (relativePath, formatLabel, fileCount) for each candidate.
+Returns one 'PathCandidate' per candidate directory.
 -}
-discoverCandidatePaths :: DatabaseConfig -> IO [(Text, Text, Int)]
+discoverCandidatePaths :: DatabaseConfig -> IO [PathCandidate]
 discoverCandidatePaths dbConfig = do
     uploadsDir <- UploadedDB.getDatabaseUploadsDir
     let uploadRoot = uploadsDir </> T.unpack (dcName dbConfig)
@@ -2069,7 +2044,7 @@ discoverCandidatePaths dbConfig = do
                 Upload.ILCDProcess -> "ILCD"
                 Upload.OpenLcaJsonLd -> "openLCA JSON-LD"
                 Upload.UnknownFormat -> "Unknown"
-        return (T.pack rel, label, count)
+        return PathCandidate{pcPath = T.pack rel, pcFormat = label, pcFileCount = count}
   where
     -- Simple relative path: strip upload root prefix
     makeRelativePath base path

From fbbe23b870459edc3de413367fa290825aaeadf2 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 12:54:47 +0200
Subject: [PATCH 17/43] fix(schema): align openapi for MissingSupplier /
 DependencyChoice / DependencyStatus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three pre-existing schema/wire mismatches in Database.Manager — now
that the JSON instances are Stripped-derived, fix the schemas too:

  - MissingSupplier: schema previously advertised the Haskell field
    names (msCount, msProductName, msLocation, msReason, msDetail);
    the actual JSON has always emitted the Stripped names (count,
    productName, location, reason, detail). Schema now matches.
    Achieved by deriving (ToJSON, ToSchema) via (Stripped X) so the
    two share one source of truth.
  - DependencyChoice: same situation — schema said dchStatus etc.,
    wire emits status etc. Same fix.
  - DependencyStatus: schema previously declared the enum as
    [SelectedDep, AvailableDep, RedundantDep] (raw constructor names);
    the wire has always emitted [selected, available, redundant]
    (lowercase, via the hand-rolled ToJSON). Schema now declared
    explicitly as the lowercase string-enum that matches.

Wire JSON unchanged at every endpoint (the ToJSON instances produce
the same bytes as before; Stripped's stripLowerPrefix is bit-equivalent
to the manual A.object [...] form, in the same field order).

OpenAPI changes are deliberate corrections. Any tooling that read the
openapi.json to generate clients was previously broken for these three
types (it would expect Haskell-named fields that the server never
emits). The categorical pattern that makes this fix one-line-per-type
is the same isomorphism transport via Coercible the prior commits
established — pushing the wire contract into the type's Generic Rep
keeps the encoder, the decoder, and the schema in lock-step.
---
 src/API/OpenApi.hs      |  7 +++----
 src/Database/Manager.hs | 19 ++++++++++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs
index 3ad6ac58..6039176a 100644
--- a/src/API/OpenApi.hs
+++ b/src/API/OpenApi.hs
@@ -42,10 +42,9 @@ instance ToSchema Value where
 -- Domain types: TechRole, BioDirection, Unit now derive ToSchema next to
 -- their data declarations in src/Types.hs (via anyclass / DerivingVia).
 
--- Database.Manager types
-instance ToSchema MissingSupplier
-instance ToSchema DependencyStatus
-instance ToSchema DependencyChoice
+-- Database.Manager ToSchema instances (MissingSupplier, DependencyChoice via
+-- Stripped; DependencyStatus as a lowercase string-enum) now live next to
+-- their data declarations in src/Database/Manager.hs.
 
 -- ToSchema for LocationKind / LocationFallback / LocationUnresolved derived
 -- alongside their data declarations in src/Types.hs (LocationKind as a string
diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs
index ac947cc4..09632cbe 100644
--- a/src/Database/Manager.hs
+++ b/src/Database/Manager.hs
@@ -107,7 +107,8 @@ import qualified Control.Exception
 import Control.Monad (forM, forM_, unless, when)
 import API.JsonOptions (Stripped (..))
 import Data.Aeson (FromJSON (..), ToJSON (..), (.:), (.:?), (.=))
-import Data.OpenApi (ToSchema)
+import Control.Lens ((&), (?~))
+import Data.OpenApi (NamedSchema (..), OpenApiType (..), ToSchema (..), enum_, type_)
 import qualified Data.Aeson as A
 import Data.Bifunctor (first)
 import Data.Char (toLower)
@@ -257,7 +258,7 @@ data MissingSupplier = MissingSupplier
     -- ^ e.g. "kg vs ton", "FR not available"
     }
     deriving (Show, Eq, Generic)
-    deriving (ToJSON) via (Stripped MissingSupplier)
+    deriving (ToJSON, ToSchema) via (Stripped MissingSupplier)
 
 {- | Whether a candidate dependency is currently selected, merely available,
 or redundant under the minimal cover (matches links but every link it wins
@@ -271,6 +272,18 @@ instance ToJSON DependencyStatus where
     toJSON AvailableDep = A.String "available"
     toJSON RedundantDep = A.String "redundant"
 
+-- | String-enum schema matching the lowercase wire codes from ToJSON above.
+-- The previous default-Generic schema advertised the raw Haskell constructor
+-- names (SelectedDep / AvailableDep / RedundantDep), which is what the schema
+-- said but never what the wire emitted.
+instance ToSchema DependencyStatus where
+    declareNamedSchema _ =
+        pure $
+            NamedSchema (Just "DependencyStatus") $
+                mempty
+                    & type_ ?~ OpenApiString
+                    & enum_ ?~ [toJSON (c :: Text) | c <- ["selected", "available", "redundant"]]
+
 -- | A candidate dependency database in one of three states
 data DependencyChoice = DependencyChoice
     { dchStatus :: !DependencyStatus
@@ -279,7 +292,7 @@ data DependencyChoice = DependencyChoice
     , dchMatchCount :: !Int
     }
     deriving (Show, Eq, Generic)
-    deriving (ToJSON) via (Stripped DependencyChoice)
+    deriving (ToJSON, ToSchema) via (Stripped DependencyChoice)
 
 {- | One of the candidate data directories inside an uploaded database's
 upload root. Surfaces in @DatabaseSetupInfo.dsiAvailablePaths@ so the UI

From 10094dfbf882a93cf080f97b003afa8a0ea3e184 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 17:30:47 +0200
Subject: [PATCH 18/43] refactor(stats): CrossDBLinkingStats and TreeStats
 become Monoids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the bespoke @mergeCrossDBStats@ / @emptyCrossDBLinkingStats@ pair
and the @combineStats@ helper with proper @Semigroup@ / @Monoid@
instances. All call sites now speak the same vocabulary every other
stats accumulator in the codebase uses:

  - @foldr mergeCrossDBStats emptyCrossDBLinkingStats xs@  →  @mconcat xs@
  - @combineStats a b@                                       →  @a <> b@
  - @emptyCrossDBLinkingStats@                              →  @mempty@
  - @mergeCrossDBStats a b@                                 →  @a <> b@

Categorically: both records are the *product of monoids in the category
of types*, derived componentwise from each field's monoid. Hand-written
rather than @via Generically@ because bare @Int@ has no canonical
'Monoid' (Sum vs Product is ambiguous) and @(Int, LinkBlocker)@ as a
map-value mixes a monoid with a non-monoid — the explicit instance
keeps the public field types ergonomic (still @Int@, not @Sum Int@) at
the cost of a few lines of mechanical derivation.

This is the same pattern the prior commit applied to 'UnlinkedSummary',
applied now to the two other genuine stats accumulators in the codebase.
What remains as named @merge*@ functions are operations that are not
pure product-of-monoids: 'mergeUnitConfigs' has a "first-wins" field
mixed with last-wins maps; 'mergeSynonymDBs' renumbers group IDs;
'mergeTechFlows' / 'mergeBioFlows' are left-biased with one unioned
field. Those keep their named functions because the asymmetry would
be confusing under the @<>@ vocabulary.

Hspec 1052/1052 green.
---
 src/Database.hs                    |  2 +-
 src/Database/Loader.hs             | 20 ++++++++--------
 src/Service.hs                     | 12 +++++++---
 src/Types.hs                       | 38 +++++++++++++++++-------------
 test/CrossDBRegionalLCIAFixture.hs |  2 +-
 test/RegionalLCIASpec.hs           |  3 +--
 6 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/src/Database.hs b/src/Database.hs
index 40365f83..ff0d55d1 100644
--- a/src/Database.hs
+++ b/src/Database.hs
@@ -256,7 +256,7 @@ buildDatabaseWithMatrices unitConfig activityMap techFlowDB bioFlowDB wasteFlowD
                         , dbBiosphereCount = bioFlowCount
                         , dbCrossDBLinks = []
                         , dbDependsOn = []
-                        , dbLinkingStats = emptyCrossDBLinkingStats
+                        , dbLinkingStats = mempty
                         , dbSynonymDB = Nothing
                         , dbFlowsByName = M.empty
                         , dbFlowsByCAS = M.empty
diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs
index 405a240d..819cf06e 100644
--- a/src/Database/Loader.hs
+++ b/src/Database/Loader.hs
@@ -45,7 +45,7 @@ module Database.Loader (
     fixActivityLinksWithCrossDB,
     findAllCrossDBLinks,
     CrossDBLinkingStats (..),
-    emptyCrossDBLinkingStats,
+    mempty,
     crossDBLinksCount,
     unresolvedCount,
     crossDBBySource,
@@ -1055,7 +1055,7 @@ saveCachedDatabaseWithMatrices dbName dataDir db = do
 -- Cross-Database Linking
 --------------------------------------------------------------------------------
 
-{- | CrossDBLinkingStats, emptyCrossDBLinkingStats, mergeCrossDBStats,
+{- | CrossDBLinkingStats, mempty, (<>),
   crossDBLinksCount, unresolvedCount, crossDBBySource
   are now defined in Types and re-exported from this module.
 -}
@@ -1113,7 +1113,7 @@ loadDatabaseWithCrossDBLinking locationAliases otherIndexes synonymDB unitConfig
             if null otherIndexes
                 then do
                     -- No cross-DB linking needed
-                    let !stats = emptyCrossDBLinkingStats{cdlUnknownUnits = unknownUnits, cdlTotalInputs = totalInputs}
+                    let !stats = mempty{cdlUnknownUnits = unknownUnits, cdlTotalInputs = totalInputs}
                     reportCrossDBLinkingStats (M.size (sdbActivities simpleDb)) stats
                     return $ Right (simpleDb, stats)
                 else do
@@ -1166,7 +1166,7 @@ fixActivityLinksWithCrossDB indexedDbs synonymDB unitConfig locationHier policy
     if unlinkedBefore == 0
         then do
             reportProgress Info "No unlinked exchanges to resolve via cross-DB linking"
-            return (db, emptyCrossDBLinkingStats{cdlTotalInputs = totalInputs})
+            return (db, mempty{cdlTotalInputs = totalInputs})
         else do
             reportProgress Info $
                 printf
@@ -1272,7 +1272,7 @@ findAllCrossDBLinks ::
     CrossDBLinkingStats
 findAllCrossDBLinks ctx techFlowDb unitDb activities =
     let results = M.mapWithKey (findActivityCrossDBLinks ctx techFlowDb unitDb) activities
-     in foldr mergeCrossDBStats emptyCrossDBLinkingStats (M.elems results)
+     in mconcat (M.elems results)
 
 -- | Find cross-database links for one activity's exchanges
 findActivityCrossDBLinks ::
@@ -1285,7 +1285,7 @@ findActivityCrossDBLinks ::
     CrossDBLinkingStats
 findActivityCrossDBLinks ctx techFlowDb unitDb (consumerActUUID, consumerProdUUID) act =
     let stats = map (findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID) (exchanges act)
-     in foldr mergeCrossDBStats emptyCrossDBLinkingStats stats
+     in mconcat stats
 
 {- | Find cross-database link for a single exchange.
 
@@ -1304,7 +1304,7 @@ findExchangeCrossDBLink ::
 findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID ex@TechnosphereExchange{techFlowId = fid, techAmount = amt, techActivityLinkId = linkId, techLocation = loc}
     | exchangeIsInput ex && linkId == UUID.nil =
         case M.lookup fid techFlowDb of
-            Nothing -> emptyCrossDBLinkingStats
+            Nothing -> mempty
             Just flow ->
                 let flowUnitName = maybe "" unitName (M.lookup (tfUnitId flow) unitDb)
                  in case findSupplierAcrossDatabases ctx (tfName flow) loc flowUnitName of
@@ -1344,13 +1344,13 @@ findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID e
                                     NoNameMatch -> []
                                     UnitIncompatible _ _ -> []
                              in CrossDBLinkingStats [] (M.singleton (tfName flow) (1, blocker)) S.empty [] unresolved 0
-    | otherwise = emptyCrossDBLinkingStats
-findExchangeCrossDBLink _ _ _ _ _ BiosphereExchange{} = emptyCrossDBLinkingStats
+    | otherwise = mempty
+findExchangeCrossDBLink _ _ _ _ _ BiosphereExchange{} = mempty
 -- Cross-DB linking for waste flows is deferred: orphan waste outputs are
 -- end-of-life markers (no demand on another DB), and waste *inputs* that
 -- require a treatment supplier would need a dedicated lookup keyed on
 -- WasteFlow rather than TechnosphereFlow. Pure no-op until that path lands.
-findExchangeCrossDBLink _ _ _ _ _ WasteExchange{} = emptyCrossDBLinkingStats
+findExchangeCrossDBLink _ _ _ _ _ WasteExchange{} = mempty
 
 -- | Report cross-database linking statistics
 reportCrossDBLinkingStats :: Int -> CrossDBLinkingStats -> IO ()
diff --git a/src/Service.hs b/src/Service.hs
index fbfb07d5..3f51c653 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -342,10 +342,16 @@ getActivityInventoryWithSharedSolver validators sharedSolver db processIdText =
                                     return $ Right inventoryExport
 
 -- | Simple stats tracking for tree processing
+-- | Tree-traversal counters (total nodes / loop nodes / leaf nodes). The
+-- 'Semigroup' / 'Monoid' instance is the product of three Sum-Int monoids,
+-- hand-written to keep the bare 'Int' constructor positions ergonomic.
 data TreeStats = TreeStats Int Int Int -- total, loops, leaves
 
-combineStats :: TreeStats -> TreeStats -> TreeStats
-combineStats (TreeStats t1 l1 v1) (TreeStats t2 l2 v2) = TreeStats (t1 + t2) (l1 + l2) (v1 + v2)
+instance Semigroup TreeStats where
+    TreeStats t1 l1 v1 <> TreeStats t2 l2 v2 = TreeStats (t1 + t2) (l1 + l2) (v1 + v2)
+
+instance Monoid TreeStats where
+    mempty = TreeStats 0 0 0
 
 {- | Helper to find ProcessId for an activity by searching the database
 This is needed because activities don't store their own ProcessId/UUID
@@ -585,7 +591,7 @@ extractNodesAndEdges db tree depth parentId nodeAcc edgeAcc = case tree of
                             , teUnit = getUnitNameForTechFlow (dbUnits db) flow
                             , teEdgeType = TechnosphereEdge
                             }
-                    newStats = combineStats statsAcc childStats'
+                    newStats = statsAcc <> childStats'
                  in (childNodes, edge : childEdges, newStats)
             (finalNodes, finalEdges, combinedStats) = foldr processChild (nodes', edgeAcc, TreeStats 1 0 0) children
             -- Add biosphere nodes and edges only for depth 0 (root level)
diff --git a/src/Types.hs b/src/Types.hs
index 1c180256..8b3bcde6 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -1116,23 +1116,27 @@ data CrossDBLinkingStats = CrossDBLinkingStats
     }
     deriving (Generic, NFData, Store)
 
--- | Empty stats
-emptyCrossDBLinkingStats :: CrossDBLinkingStats
-emptyCrossDBLinkingStats = CrossDBLinkingStats [] M.empty S.empty [] [] 0
-
--- | Merge two CrossDBLinkingStats
-mergeCrossDBStats :: CrossDBLinkingStats -> CrossDBLinkingStats -> CrossDBLinkingStats
-mergeCrossDBStats s1 s2 =
-    CrossDBLinkingStats
-        { cdlLinks = cdlLinks s1 ++ cdlLinks s2
-        , cdlUnresolvedProducts = M.unionWith mergeUnresolved (cdlUnresolvedProducts s1) (cdlUnresolvedProducts s2)
-        , cdlUnknownUnits = S.union (cdlUnknownUnits s1) (cdlUnknownUnits s2)
-        , cdlLocationFallbacks = cdlLocationFallbacks s1 ++ cdlLocationFallbacks s2
-        , cdlLocationUnresolved = cdlLocationUnresolved s1 ++ cdlLocationUnresolved s2
-        , cdlTotalInputs = cdlTotalInputs s1 + cdlTotalInputs s2
-        }
-  where
-    mergeUnresolved (c1, b) (c2, _) = (c1 + c2, b)
+{- | Product of monoids, componentwise: lists concat, the count\/blocker
+map unions (counts summed, first blocker wins as a tiebreaker), the set
+unions, the @Int@ counter sums. Hand-written rather than @via Generically@
+because bare @Int@ has no canonical 'Monoid' (Sum vs Product is ambiguous)
+and the @(Int, LinkBlocker)@ map value mixes a 'Monoid' with a non-'Monoid'.
+-}
+instance Semigroup CrossDBLinkingStats where
+    s1 <> s2 =
+        CrossDBLinkingStats
+            { cdlLinks = cdlLinks s1 <> cdlLinks s2
+            , cdlUnresolvedProducts = M.unionWith mergeUnresolved (cdlUnresolvedProducts s1) (cdlUnresolvedProducts s2)
+            , cdlUnknownUnits = cdlUnknownUnits s1 <> cdlUnknownUnits s2
+            , cdlLocationFallbacks = cdlLocationFallbacks s1 <> cdlLocationFallbacks s2
+            , cdlLocationUnresolved = cdlLocationUnresolved s1 <> cdlLocationUnresolved s2
+            , cdlTotalInputs = cdlTotalInputs s1 + cdlTotalInputs s2
+            }
+      where
+        mergeUnresolved (c1, b) (c2, _) = (c1 + c2, b)
+
+instance Monoid CrossDBLinkingStats where
+    mempty = CrossDBLinkingStats [] M.empty S.empty [] [] 0
 
 -- | Deduplicate location fallbacks by (product, requestedLoc)
 deduplicateFallbacks :: [LocationFallback] -> [LocationFallback]
diff --git a/test/CrossDBRegionalLCIAFixture.hs b/test/CrossDBRegionalLCIAFixture.hs
index 5951e707..2ea2fc97 100644
--- a/test/CrossDBRegionalLCIAFixture.hs
+++ b/test/CrossDBRegionalLCIAFixture.hs
@@ -153,7 +153,7 @@ mkDB offset locs bioTriples =
             , dbBiosphereCount = 1
             , dbCrossDBLinks = []
             , dbDependsOn = []
-            , dbLinkingStats = emptyCrossDBLinkingStats
+            , dbLinkingStats = mempty
             , dbSynonymDB = Nothing
             , dbFlowsByName = M.empty
             , dbFlowsByCAS = M.empty
diff --git a/test/RegionalLCIASpec.hs b/test/RegionalLCIASpec.hs
index 945dc14f..2bc6d3ce 100644
--- a/test/RegionalLCIASpec.hs
+++ b/test/RegionalLCIASpec.hs
@@ -31,7 +31,6 @@ import Types (
     Indexes (..),
     SparseTriple (..),
     Unit (..),
-    emptyCrossDBLinkingStats,
     emptyProductIndex,
  )
 import UnitConversion (UnitConfig (..), UnitDef (..))
@@ -125,7 +124,7 @@ mkDB locsAndEmissions =
             , dbBiosphereCount = 1
             , dbCrossDBLinks = []
             , dbDependsOn = []
-            , dbLinkingStats = emptyCrossDBLinkingStats
+            , dbLinkingStats = mempty
             , dbSynonymDB = Nothing
             , dbFlowsByName = M.empty
             , dbFlowsByCAS = M.empty

From 793e317402626c14dbedd36ef57fc3231640fe95 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 17:32:53 +0200
Subject: [PATCH 19/43] refactor(crosslinking): name the "first non-empty"
 cascade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The supplier-search algorithm in findSupplierInIndexedDBs was a nested
if-null ladder:

    exact = ...
    syn   = if null exact then ... else []
    pref  = if null exact && null syn then ... else []
    all   = exact ++ syn ++ pref

By the if-null guard, at most one of the three lists is non-empty —
the ++ is a sleight-of-hand for "the first non-empty wins". The same
pattern recurs one level deeper inside tryPrefixes (try each prefix's
exact + synonym sub-cascade; first match wins).

Both collapse cleanly to a single helper:

    firstNonEmpty :: [[a]] -> [a]
    firstNonEmpty = fromMaybe [] . find (not . null)

which is the First monoid on Maybe [a] (lift each candidate list into
Maybe via nonEmpty, combine with <|>, drop back) — collapsed because
that's the exact shape every match-strategy cascade in the module
wants. The cascade then reads as a priority-ordered list of strategies,
and the recursive tryPrefixes becomes a simple firstNonEmpty over
per-prefix sub-cascades, with the same helper applied at both levels.

Net behaviour identical (same inputs, same outputs, no perf change);
the algorithm's structure is now declared instead of unfolded.
---
 src/Database/CrossLinking.hs | 58 ++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/src/Database/CrossLinking.hs b/src/Database/CrossLinking.hs
index e4b808ef..7231c0da 100644
--- a/src/Database/CrossLinking.hs
+++ b/src/Database/CrossLinking.hs
@@ -63,6 +63,7 @@ module Database.CrossLinking (
 ) where
 
 import Data.Char (isAlpha, isUpper)
+import Data.Foldable (find)
 import Data.List (maximumBy)
 import qualified Data.Map.Strict as M
 import Data.Maybe (fromMaybe, mapMaybe)
@@ -398,21 +399,19 @@ findSupplierInIndexedDBs ::
     CrossDBLinkResult
 findSupplierInIndexedDBs LinkingContext{..} productName location unit =
     let normalizedName = normalizeText productName
-        -- Try exact match first (O(1) lookup)
-        exactCandidates = concatMap (lookupExact normalizedName) lcIndexedDatabases
-        -- Try synonym match if no exact match
-        synonymCandidates =
-            if null exactCandidates
-                then case lookupSynonymGroup lcSynonymDB (normalizeName productName) of
+        -- Three priority-ordered match strategies; we take the first non-empty
+        -- result via 'firstNonEmpty' (the "First" monoid restricted to lists).
+        --   1. Exact product-name match across all indexed DBs.
+        --   2. Synonym-group match if exact yielded nothing.
+        --   3. Prefix-splitting fallback for compound names (e.g. SimaPro).
+        allCandidates =
+            firstNonEmpty
+                [ concatMap (lookupExact normalizedName) lcIndexedDatabases
+                , case lookupSynonymGroup lcSynonymDB (normalizeName productName) of
                     Just groupId -> concatMap (lookupBySynonym groupId) lcIndexedDatabases
                     Nothing -> []
-                else []
-        -- Fallback: try prefix-based splitting for compound names (e.g. SimaPro)
-        prefixCandidates =
-            if null exactCandidates && null synonymCandidates
-                then tryPrefixes (extractProductPrefixes productName)
-                else []
-        allCandidates = exactCandidates ++ synonymCandidates ++ prefixCandidates
+                , tryPrefixes (extractProductPrefixes productName)
+                ]
         -- Effective location: if raw location is empty, try extracting from compound name
         effectiveLocation =
             if T.null location
@@ -481,20 +480,27 @@ findSupplierInIndexedDBs LinkingContext{..} productName location unit =
     lookupBySynonym groupId idb =
         [(idbName idb, entry) | entry <- fromMaybe [] (M.lookup groupId (idbBySynonymGroup idb))]
 
-    -- Try each prefix from compound name splitting, return first match
+    {- | First non-empty list in a priority order. This is the @First@ monoid
+    on @Maybe [a]@ (lift each list into @Maybe@ via 'nonEmpty', combine with
+    @<|>@, drop back), collapsed to a single helper because that's the
+    exact shape every match-strategy cascade in this module wants.
+    -}
+    firstNonEmpty :: [[a]] -> [a]
+    firstNonEmpty = fromMaybe [] . find (not . null)
+
+    -- Try each prefix from compound name splitting; for each prefix run the
+    -- same (exact, then synonym) sub-cascade; return the first prefix that
+    -- yields anything.
     tryPrefixes :: [Text] -> [(Text, SupplierEntry)]
-    tryPrefixes [] = []
-    tryPrefixes (p : ps) =
-        let normalized = normalizeText p
-            candidates = concatMap (lookupExact normalized) lcIndexedDatabases
-         in if null candidates
-                then -- Also try synonym match for this prefix
-                    case lookupSynonymGroup lcSynonymDB (normalizeName p) of
-                        Just groupId ->
-                            let synCandidates = concatMap (lookupBySynonym groupId) lcIndexedDatabases
-                             in if null synCandidates then tryPrefixes ps else synCandidates
-                        Nothing -> tryPrefixes ps
-                else candidates
+    tryPrefixes = firstNonEmpty . map candidatesFor
+      where
+        candidatesFor p =
+            let normalized = normalizeText p
+                byExact = concatMap (lookupExact normalized) lcIndexedDatabases
+                bySynonym = case lookupSynonymGroup lcSynonymDB (normalizeName p) of
+                    Just groupId -> concatMap (lookupBySynonym groupId) lcIndexedDatabases
+                    Nothing -> []
+             in firstNonEmpty [byExact, bySynonym]
 
     classifyEntry :: Text -> (Text, SupplierEntry) -> Maybe ((Text, SupplierEntry), LocationKind)
     classifyEntry queryLoc entry@(_, SupplierEntry{seLocation}) =

From b3165571cd1d94e6d229552f6863986691e7456f Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 17:36:46 +0200
Subject: [PATCH 20/43] refactor(api): route every ServiceError through
 throwServiceError
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`throwServiceError` was already defined as the single translator from
Service.ServiceError to HTTP error responses, but six handler bodies
and three internal helpers (withValidatedActivity, withValidatedFlow,
resolveOrThrow) still inlined their own ad-hoc case ladders. The
inlined translations had diverged over time — e.g. MatrixError was
mapped to err500 in three handlers but err422 in throwServiceError
itself (the comment near throwServiceError documents why err422 is
correct: it indicates a client-submitted invariant breakage).

Each case ladder collapses to `either throwServiceError pure
(Service.X ...)`. Net −40 LOC in src/API/Routes.hs.

Behaviour changes (all corrections, all toward the documented intent):
  - GET /graph, GET /supply-chain, GET /aggregate, GET /consumers:
    MatrixError now returns 422 with the inner message instead of 500
    with the same message. Aligns these endpoints with the rest of the
    cross-DB pipeline.
  - resolveOrThrow: previously returned 500 with `show e` for
    MatrixError / InvalidUUID / FlowNotFound; now goes through
    throwServiceError (422 / 500 with the inner message, respectively).
  - withValidatedFlow: the three-way M.lookup on tech/bio/waste flows
    becomes `asum [TechKind <$> ..., BioKind <$> ..., WasteKind <$> ...]`
    — the same Alternative-on-Maybe pattern firstNonEmpty captured for
    list cascades.

Categorically: the Either→AppM bind is `either throwServiceError pure`,
which is the Kleisli arrow that lifts a pure Service.X result into the
handler monad. Now used uniformly; the divergent inlines are gone.

Hspec 1052/1052 green. No test verifies specific status codes for the
shifted endpoints (only that MatrixError is surfaced as an error value
at all).
---
 src/API/Routes.hs | 90 +++++++++++++----------------------------------
 1 file changed, 25 insertions(+), 65 deletions(-)

diff --git a/src/API/Routes.hs b/src/API/Routes.hs
index cbeeb53c..4ebd1002 100644
--- a/src/API/Routes.hs
+++ b/src/API/Routes.hs
@@ -19,6 +19,7 @@ import Control.Monad (forM, forM_, unless, when)
 import Control.Monad.IO.Class (liftIO)
 import Data.Aeson
 import qualified Data.ByteString.Lazy as BSL
+import Data.Foldable (asum)
 import Data.List (find, intercalate, sortBy, sortOn)
 import qualified Data.List.NonEmpty as NE
 import qualified Data.Validation as V
@@ -333,32 +334,24 @@ mkCrossDBContrib dbManager rootDbName _flowDB unitDB score ((depDbName, pid), c)
 
 -- | Helper function to validate ProcessId and lookup activity
 withValidatedActivity :: Database -> Text -> (Activity -> AppM a) -> AppM a
-withValidatedActivity db processId action = do
-    case Service.resolveActivityByProcessId db processId of
-        Left (Service.InvalidProcessId errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg}
-        Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-        Left _ -> throwError err400{errBody = "Invalid request"}
-        Right activity -> action activity
+withValidatedActivity db processId action =
+    either throwServiceError action (Service.resolveActivityByProcessId db processId)
 
 {- | Helper function to validate UUID and lookup flow. Returns a tagged sum
 so callers can dispatch on tech vs bio.
 -}
 withValidatedFlow :: Database -> Text -> (FlowKind -> AppM a) -> AppM a
 withValidatedFlow db uuid action = do
-    case Service.validateUUID uuid of
-        Left (Service.InvalidUUID errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg}
-        Left _ -> throwError err400{errBody = "Invalid request"}
-        Right validUuidText ->
-            case UUID.fromText validUuidText of
-                Nothing -> throwError err400{errBody = "Invalid UUID format"}
-                Just validUuid ->
-                    case M.lookup validUuid (dbTechFlows db) of
-                        Just flow -> action (TechKind flow)
-                        Nothing -> case M.lookup validUuid (dbBioFlows db) of
-                            Just flow -> action (BioKind flow)
-                            Nothing -> case M.lookup validUuid (dbWasteFlows db) of
-                                Just flow -> action (WasteKind flow)
-                                Nothing -> throwError err404{errBody = "Flow not found"}
+    validUuidText <- either throwServiceError pure (Service.validateUUID uuid)
+    validUuid <- maybe (throwError err400{errBody = "Invalid UUID format"}) pure (UUID.fromText validUuidText)
+    let lookups =
+            [ TechKind <$> M.lookup validUuid (dbTechFlows db)
+            , BioKind <$> M.lookup validUuid (dbBioFlows db)
+            , WasteKind <$> M.lookup validUuid (dbWasteFlows db)
+            ]
+    case asum lookups of
+        Just flow -> action flow
+        Nothing -> throwError err404{errBody = "Flow not found"}
 
 -- | Login request body
 newtype LoginRequest = LoginRequest
@@ -480,20 +473,10 @@ HTTP status. Validates the resolved ProcessId against the technosphere
 matrix index too — see Service.validateProcessIdInMatrixIndex.
 -}
 resolveOrThrow :: Database -> Text -> AppM (ProcessId, Activity)
-resolveOrThrow db processIdText =
-    case Service.resolveActivityAndProcessId db processIdText of
-        Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-        Left (Service.InvalidProcessId msg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-        Left e@(Service.MatrixError _) -> internalError e
-        Left e@(Service.InvalidUUID _) -> internalError e
-        Left e@(Service.FlowNotFound _) -> internalError e
-        Right (pid, act) ->
-            case Service.validateProcessIdInMatrixIndex db pid of
-                Left e -> internalError e
-                Right () -> return (pid, act)
-  where
-    internalError :: Service.ServiceError -> AppM a
-    internalError e = throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show e}
+resolveOrThrow db processIdText = do
+    (pid, act) <- either throwServiceError pure (Service.resolveActivityAndProcessId db processIdText)
+    either throwServiceError pure (Service.validateProcessIdInMatrixIndex db pid)
+    pure (pid, act)
 
 {- | Translate a Service-level error into the HTTP status used across the
 cross-DB LCIA paths.
@@ -1100,13 +1083,10 @@ lcaServer env =
     getActivityInfo dbName processId = do
         (db, _) <- requireDatabaseByName dbName
         unitCfg <- liftIO $ getMergedUnitConfig dbManager
-        case Service.getActivityInfo unitCfg db processId of
-            Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-            Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-            Left _ -> throwError err500{errBody = "Internal server error"}
-            Right result -> case fromJSON result of
-                Success activityInfo -> return activityInfo
-                Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err}
+        result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId)
+        case fromJSON result of
+            Success activityInfo -> return activityInfo
+            Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err}
 
     -- Activity flows sub-resource
     getActivityFlows :: Text -> Text -> AppM [FlowSummary]
@@ -1177,12 +1157,7 @@ lcaServer env =
         (db, sharedSolver) <- requireDatabaseByName dbName
         let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff
         result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent
-        case result of
-            Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-            Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-            Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-            Left _ -> throwError err500{errBody = "Internal server error"}
-            Right graphExport -> return graphExport
+        either throwServiceError pure result
 
     -- Build the supply-chain filter shared by the GET and POST handlers.
     buildSupplyChainFilter :: Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Service.SupplyChainFilter
@@ -1236,13 +1211,7 @@ lcaServer env =
             Nothing -> do
                 unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
                 result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges
-                case result of
-                    Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-                    Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-                    Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-                    Left (Service.InvalidUUID _) -> throwError err500{errBody = "Internal server error"}
-                    Left (Service.FlowNotFound _) -> throwError err500{errBody = "Internal server error"}
-                    Right supplyChain -> return supplyChain
+                either throwServiceError pure result
             Just subReq -> do
                 (processId, _) <- resolveOrThrow db processIdText
                 -- Use the cross-DB-aware substitution resolver so qualified PIDs in
@@ -1367,12 +1336,7 @@ lcaServer env =
             unitCfg <- liftIO $ getMergedUnitConfig dbManager
             (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
             result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params
-            case result of
-                Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-                Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-                Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-                Left _ -> throwError err500{errBody = "Internal server error"}
-                Right agg -> return agg
+            either throwServiceError pure result
           where
             -- Parse "System=Value[:exact]" into (system, value, isExact).
             parseClassFilter :: Text -> Maybe (Text, Text, Bool)
@@ -1523,11 +1487,7 @@ lcaServer env =
                     , Service.cnfMaxDepth = maxDepthParam
                     , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam
                     }
-        case Service.getConsumers db dbName processIdText cnf of
-            Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-            Left (Service.InvalidProcessId msg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-            Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
-            Right consumers -> return consumers
+        either throwServiceError pure (Service.getConsumers db dbName processIdText cnf)
 
     -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity)
     getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value

From 62de1622816e743efe98dbe02c35b487491d7840 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 23:21:22 +0200
Subject: [PATCH 21/43] refactor(api): finish AppM migration; lift handlers to
 top level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every Servant handler that previously lived in `lcaServer`'s `where`-block
is now a top-level `:: AppM …` action that reads its env slice via
`asks aeDbManager` / `aeMaxTreeDepth` / `aePassword` / `aeHostingConfig`
/ `aeClassificationPresets`. `lcaServer` body collapses to
`hoistServer lcaAPI (runApp env) handlers`, with only the `:<|>` chain
left inside `where`. This unblocks non-Servant reuse (BatchImpacts etc.)
without the prior "hoist after the fact" gymnastics.

The five Has-* capability classes are deleted — only `HasDatabaseManager`
was used (at one site) and the others were dead code; `asks aeX` is
the same length without the class indirection.

Three near-identical `expandPreset + zipWith3` preset/explicit-filter
merges (consumers, supply-chain core, search) collapse to one
`mergeClassFilters` helper.

Trim categorical prose that the implementations don't exploit:
the "Coercible-witnessed iso / dictionary transport" paragraph in
`Stripped` (no coerce in the file), the Milewski reference in
`Validation` (replaced with the actual `ap = (<*>)` law that blocks
a Monad instance), the "Free Applicative over a primitive parser action"
paragraph on the optparse helpers (tautological w.r.t. the library),
the "product of four monoids" / "product of monoids, componentwise"
prose on `UnlinkedSummary` / `CrossDBLinkingStats` / `TreeStats`,
the long App.Env head doc, and the dead `mempty,` re-export from
`Database.Loader` (class methods are not exportable as symbols).

Net: 9 files, +1022/-1142. `cabal build all` clean, lca-tests
1052/1052 green, `dump-openapi` byte-identical (md5 matches PR head).
---
 src/API/DatabaseHandlers.hs |   42 +-
 src/API/JsonOptions.hs      |   18 +-
 src/API/Routes.hs           | 1962 ++++++++++++++++++-----------------
 src/App/Env.hs              |   91 +-
 src/CLI/Parser.hs           |    9 +-
 src/Data/Validation.hs      |   15 +-
 src/Database/Loader.hs      |   12 +-
 src/Service.hs              |    5 +-
 src/Types.hs                |   10 +-
 9 files changed, 1022 insertions(+), 1142 deletions(-)

diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs
index aa011473..76f71253 100644
--- a/src/API/DatabaseHandlers.hs
+++ b/src/API/DatabaseHandlers.hs
@@ -131,13 +131,13 @@ import Database.Upload (
  )
 import qualified Database.UploadedDatabase as UploadedDB
 import Types (Database (..), GeographyPolicy (..), unresolvedCount)
-import App.Env (AppM, HasDatabaseManager (..))
+import App.Env (AppEnv (..), AppM)
 import Control.Monad.Reader (asks)
 
 -- | List all databases
 getDatabases :: AppM DatabaseListResponse
 getDatabases = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     dbStatuses <- liftIO $ listDatabases dbManager
     let statusList = map convertDbStatus dbStatuses
     return $ DatabaseListResponse statusList
@@ -145,7 +145,7 @@ getDatabases = do
 -- | Load a database on demand
 loadDatabaseHandler :: Text -> AppM LoadDatabaseResponse
 loadDatabaseHandler dbName= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     eitherResult <- liftIO $ try $ loadDatabase dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -158,7 +158,7 @@ loadDatabaseHandler dbName= do
 -- | Unload a database from memory
 unloadDatabaseHandler :: Text -> AppM ActivateResponse
 unloadDatabaseHandler dbName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     simpleAction (unloadDatabase dbManager dbName) ("Unloaded database: " <> dbName)
 
 {- | Re-run cross-DB linking for a loaded database against the currently-loaded
@@ -167,7 +167,7 @@ suboptimal order without reloading the whole database.
 -}
 relinkDatabaseHandler :: Text -> AppM RelinkResponse
 relinkDatabaseHandler dbName= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     res <- liftIO $ relinkDatabase dbManager dbName
     case res of
         Left err -> throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 err}
@@ -184,13 +184,13 @@ relinkDatabaseHandler dbName= do
 -- | Delete an uploaded database (move to trash)
 deleteDatabaseHandler :: Text -> AppM ActivateResponse
 deleteDatabaseHandler dbName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     simpleAction (removeDatabase dbManager dbName) ("Deleted database: " <> dbName)
 
 -- | Upload a new database
 uploadDatabaseHandler :: UploadRequest -> AppM UploadResponse
 uploadDatabaseHandler req= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     -- Decode base64 ZIP data
     let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case zipDataResult of
@@ -324,7 +324,7 @@ Returns completeness, missing suppliers, and dependency suggestions
 -}
 getDatabaseSetupHandler :: Text -> AppM DatabaseSetupInfo
 getDatabaseSetupHandler dbName= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     eitherResult <- liftIO $ try $ getDatabaseSetupInfo dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -338,7 +338,7 @@ Runs cross-DB linking and returns updated setup info
 -}
 addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
 addDependencyHandler dbName depName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     ioEither400 (addDependencyToStaged dbManager dbName depName)
 
 {- | Remove a dependency from a staged database
@@ -346,13 +346,13 @@ Re-runs cross-DB linking and returns updated setup info
 -}
 removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo
 removeDependencyHandler dbName depName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     ioEither400 (removeDependencyFromStaged dbManager dbName depName)
 
 -- | Change the data path for an uploaded (staged) database
 setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo
 setDataPathHandler dbName body = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     -- Extract "path" from JSON body
     let mPath = case body of
             A.Object obj -> case KM.lookup "path" obj of
@@ -368,7 +368,7 @@ Builds matrices and makes it ready for queries
 -}
 finalizeDatabaseHandler :: Text -> AppM ActivateResponse
 finalizeDatabaseHandler dbName= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     eitherResult <- liftIO $ try $ finalizeDatabase dbManager dbName
     case eitherResult of
         Left (ex :: SomeException) ->
@@ -383,7 +383,7 @@ Same flow as database upload but creates MethodConfig entry
 -}
 uploadMethodHandler :: UploadRequest -> AppM UploadResponse
 uploadMethodHandler req= do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case zipDataResult of
         Left err -> return $ UploadResponse False ("Invalid base64 data: " <> T.pack err) Nothing Nothing
@@ -439,7 +439,7 @@ uploadMethodHandler req= do
 -- | Delete an uploaded method collection
 deleteMethodHandler :: Text -> AppM ActivateResponse
 deleteMethodHandler name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     simpleAction (removeMethodCollection dbManager name) ("Deleted method: " <> name)
 
 -- | Common pattern: run an IO action that returns Either Text (), map to ActivateResponse
@@ -500,32 +500,32 @@ convertRefDataStatus s =
 
 listRefData :: RefDataKind -> AppM RefDataListResponse
 listRefData kind = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let (listFn, _, _, _, _, _) = rdOps kind
     statuses <- liftIO $ listFn dbManager
     return $ RefDataListResponse (map convertRefDataStatus statuses)
 
 loadRefData :: RefDataKind -> Text -> AppM ActivateResponse
 loadRefData kind name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let (_, loadFn, _, _, _, _) = rdOps kind
     simpleAction (loadFn dbManager name) ("Loaded: " <> name)
 
 unloadRefData :: RefDataKind -> Text -> AppM ActivateResponse
 unloadRefData kind name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let (_, _, unloadFn, _, _, _) = rdOps kind
     simpleAction (unloadFn dbManager name) ("Unloaded: " <> name)
 
 deleteRefData :: RefDataKind -> Text -> AppM ActivateResponse
 deleteRefData kind name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let (_, _, _, _, removeFn, _) = rdOps kind
     simpleAction (removeFn dbManager name) ("Deleted: " <> name)
 
 uploadRefData :: RefDataKind -> UploadRequest -> AppM UploadResponse
 uploadRefData kind req = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let (_, _, _, addFn, _, subdir) = rdOps kind
     let csvDataResult = B64.decode $ T.encodeUtf8 $ urFileData req
     case csvDataResult of
@@ -564,7 +564,7 @@ uploadRefData kind req = do
 
 getFlowSynonymGroupsHandler :: Text -> AppM SynonymGroupsResponse
 getFlowSynonymGroupsHandler name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     result <- liftIO $ getFlowSynonymGroups dbManager name
     case result of
         Left err -> throwError $ err404{errBody = BSL.fromStrict $ T.encodeUtf8 err}
@@ -572,7 +572,7 @@ getFlowSynonymGroupsHandler name = do
 
 downloadRefDataHandler :: RefDataKind -> Text -> AppM (Headers '[Header "Content-Disposition" Text] BinaryContent)
 downloadRefDataHandler kind name = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     let tvar = case kind of
             FlowSynonyms -> dmAvailableFlowSyns dbManager
             CompartmentMappings -> dmAvailableCompMaps dbManager
diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs
index 442fec54..15caea6e 100644
--- a/src/API/JsonOptions.hs
+++ b/src/API/JsonOptions.hs
@@ -53,17 +53,8 @@ strippedParseJSON = genericParseJSON stripLowerPrefix
 strippedSchemaOptions :: SchemaOptions
 strippedSchemaOptions = fromAesonOptions stripLowerPrefix
 
-{- | DerivingVia carrier collapsing the ~180 hand-rolled @ToJSON@ \/ @FromJSON@
-\/ @ToSchema@ instance blocks that share the same shape:
-
-@
-  toJSON     = strippedToJSON
-  toEncoding = strippedToEncoding
-  parseJSON  = strippedParseJSON
-  declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions
-@
-
-Usage:
+{- | DerivingVia carrier: strips the lowercase field-name prefix
+(@fooBar -> bar@) and uses the @Generic@ instances. Usage:
 
 @
   data Foo = Foo { fooBar :: Int } deriving Generic
@@ -71,11 +62,6 @@ Usage:
   deriving via (Stripped Foo) instance FromJSON Foo
   deriving via (Stripped Foo) instance ToSchema Foo
 @
-
-Categorically: dictionary transport along the zero-cost iso witnessed by
-'Coercible'. The instance bodies bypass the user-facing typeclass on @a@ and
-call the @Generic@ helpers directly, avoiding the recursion that would arise
-from @instance ToJSON a => ToJSON (Stripped a)@.
 -}
 newtype Stripped a = Stripped {unStripped :: a}
 
diff --git a/src/API/Routes.hs b/src/API/Routes.hs
index 4ebd1002..72cb4a68 100644
--- a/src/API/Routes.hs
+++ b/src/API/Routes.hs
@@ -56,7 +56,7 @@ import qualified SharedSolver
 import Tree (buildLoopAwareTree)
 import Types
 import qualified Version
-import App.Env (AppEnv (..), AppM, HasDatabaseManager (..), runApp)
+import App.Env (AppEnv (..), AppM, runApp)
 import Control.Monad.Reader (asks)
 
 -- | API type definition - RESTful design with focused endpoints
@@ -189,7 +189,7 @@ notLoadedBody prefix name = BSL.fromStrict (T.encodeUtf8 (prefix <> name))
 -- | Get database by name, throw 404 if not loaded
 requireDatabaseByName :: Text -> AppM (Database, SharedSolver)
 requireDatabaseByName dbName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     maybeLoaded <- liftIO $ getDatabase dbManager dbName
     case maybeLoaded of
         Just loaded -> return (ldDatabase loaded, ldSharedSolver loaded)
@@ -232,7 +232,7 @@ methods.
 -}
 solutionWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM SharedSolver.CrossDBSolution
 solutionWithDeps dbName db solver pid = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     requireFullyLinked dbName db
     unitCfg <- liftIO $ getMergedUnitConfig dbManager
     res <-
@@ -256,7 +256,7 @@ inventoriesWithDeps dbName db solver pids =
 -- | Batch variant of 'solutionWithDeps'.
 solutionsWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [SharedSolver.CrossDBSolution]
 solutionsWithDeps dbName db solver pids = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     requireFullyLinked dbName db
     unitCfg <- liftIO $ getMergedUnitConfig dbManager
     res <-
@@ -494,7 +494,7 @@ throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Intern
 -- | Load a method collection by name from the live DatabaseManager state.
 loadCollection :: Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet])
 loadCollection collectionName = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager)
     case M.lookup collectionName loadedCollections of
         Just mc -> return (mcMethods mc, mcDamageCategories mc, mcNormWeightSets mc, mcScoringSets mc)
@@ -508,7 +508,7 @@ crossDBSolutionFor :: Text -> Database -> SharedSolver -> ProcessId -> Maybe Sub
 crossDBSolutionFor dbName db solver pid mSub = case mSub of
     Nothing -> solutionWithDeps dbName db solver pid
     Just subReq -> do
-        dbManager <- asks getDatabaseManager
+        dbManager <- asks aeDbManager
         requireFullyLinked dbName db
         unitCfg <- liftIO $ getMergedUnitConfig dbManager
         eSol <-
@@ -772,7 +772,7 @@ activityLCIABatchH ::
     Maybe SubstitutionRequest ->
     AppM LCIABatchResult
 activityLCIABatchH dbName processIdText collectionName mSub = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     (db, sharedSolver) <- requireDatabaseByName dbName
     (actProcessId, activity) <- resolveOrThrow db processIdText
     (methods, damageCats, nwSets, scoringSets) <- loadCollection collectionName
@@ -836,7 +836,7 @@ batchImpactsH ::
     BatchImpactsRequest ->
     AppM BatchImpactsResponse
 batchImpactsH dbName collectionName topFlowsParam req = do
-    dbManager <- asks getDatabaseManager
+    dbManager <- asks aeDbManager
     (db, sharedSolver) <- requireDatabaseByName dbName
     loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager)
     collection <- case M.lookup collectionName loadedCollections of
@@ -896,24 +896,983 @@ batchImpactsH dbName collectionName topFlowsParam req = do
             , birInvalid = invalid
             }
 
-{- | API server implementation. Handlers live in 'AppM' (a 'ReaderT'
-'AppEnv' over Servant's 'Handler'); 'hoistServer' is the natural
-transformation that lifts the @ServerT LCAAPI AppM@ into a plain
-@Server LCAAPI@ Servant expects at the WAI boundary.
--}
+-- ---------------------------------------------------------------------------
+-- Pure helpers shared by handlers
+-- ---------------------------------------------------------------------------
+
+-- | Parse "System=Value[:exact]" into (system, value, isExact).
+parseClassFilter :: Text -> Maybe (Text, Text, Bool)
+parseClassFilter raw =
+    let (sys, rest) = T.breakOn "=" raw
+     in if T.null rest
+            then Nothing
+            else
+                let valAndMode = T.drop 1 rest
+                    (val, mode) = T.breakOn ":" valAndMode
+                    isExact = T.drop 1 mode == "exact"
+                 in Just (T.strip sys, T.strip val, isExact)
+
+-- | Merge preset-derived and explicit (system, value, exact) classification filters.
+mergeClassFilters
+    :: [Config.ClassificationPreset]
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> [(Text, Text, Bool)]
+mergeClassFilters presets presetParam systems values modes =
+    expandPreset presets presetParam
+        ++ zipWith3
+            (\s v m -> (s, v, m == "exact"))
+            systems
+            values
+            (modes ++ repeat "contains")
+
+-- | Build a 'Service.SupplyChainFilter' shared by GET and POST handlers.
+buildSupplyChainFilter
+    :: [Config.ClassificationPreset]
+    -> Maybe Text
+    -> Maybe Int
+    -> Maybe Double
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> Maybe Text
+    -> Maybe Text
+    -> Service.SupplyChainFilter
+buildSupplyChainFilter presets nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam =
+    Service.SupplyChainFilter
+        { Service.scfCore =
+            Service.ActivityFilterCore
+                { Service.afcName = nameFilter
+                , Service.afcLocation = locationFilter
+                , Service.afcProduct = productFilter
+                , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes
+                , Service.afcLimit = limitParam
+                , Service.afcOffset = offsetParam
+                , Service.afcSort = sortParam
+                , Service.afcOrder = orderParam
+                }
+        , Service.scfMaxDepth = maxDepthParam
+        , Service.scfMinQuantity = minQuantity
+        }
+
+buildFlowEntry :: Database -> M.Map UUID (MethodCF, MatchStrategy) -> UUID -> FlowCFEntry
+buildFlowEntry db reverseIndex uuid =
+    let mFlow = M.lookup uuid (dbBioFlows db)
+        mMatch = M.lookup uuid reverseIndex
+     in FlowCFEntry
+            { fceFlowId = uuid
+            , fceFlowName = maybe "" bfName mFlow
+            , fceFlowCategory = maybe "" bfCompartmentName mFlow
+            , fceCfValue = fmap (mcfValue . fst) mMatch
+            , fceCfFlowName = fmap (mcfFlowName . fst) mMatch
+            , fceMatchStrategy = fmap (strategyToText . snd) mMatch
+            }
+
+strategyToText :: MatchStrategy -> Text
+strategyToText ByUUID = "uuid"
+strategyToText ByCAS = "cas"
+strategyToText ByName = "name"
+strategyToText BySynonym = "synonym"
+strategyToText ByFuzzy = "fuzzy"
+strategyToText NoMatch = "none"
+
+matchesQuery :: Maybe Text -> Text -> Text -> Bool
+matchesQuery Nothing _ _ = True
+matchesQuery (Just q) cfName dbFlowName =
+    T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbFlowName)
+
+cfToAPI :: MethodCF -> MethodFactorAPI
+cfToAPI cf =
+    MethodFactorAPI
+        { mfaFlowRef = mcfFlowRef cf
+        , mfaFlowName = mcfFlowName cf
+        , mfaDirection = case mcfDirection cf of
+            MT.Input -> "Input"
+            MT.Output -> "Output"
+        , mfaValue = mcfValue cf
+        }
+
+-- ---------------------------------------------------------------------------
+-- AppM helpers
+-- ---------------------------------------------------------------------------
+
+-- | Lookup a method by UUID across all loaded collections.
+loadMethodByUUID :: Text -> AppM Method
+loadMethodByUUID uuidText = do
+    dbManager <- asks aeDbManager
+    loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
+    let allMethods = map snd loadedMethods
+    case UUID.fromText uuidText of
+        Nothing -> throwError err400{errBody = "Invalid method UUID format"}
+        Just uuid ->
+            case filter (\m -> methodId m == uuid) allMethods of
+                (m : _) -> return m
+                [] -> throwError err404{errBody = "Method not found"}
+
+-- | Resolve (db, solver, ProcessId, Activity, Method) and dispatch.
+withActivityAndMethod
+    :: Text
+    -> Text
+    -> Text
+    -> (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a)
+    -> AppM a
+withActivityAndMethod dbName processIdText methodIdText k = do
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    method <- loadMethodByUUID methodIdText
+    case Service.resolveActivityAndProcessId db processIdText of
+        Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
+        Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
+        Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
+        Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method
+
+-- ---------------------------------------------------------------------------
+-- Servant handlers (top-level AppM actions)
+-- ---------------------------------------------------------------------------
+
+getOpenApiSpec :: AppM Value
+getOpenApiSpec = return $ toJSON volcaOpenApi
+
+getVersion :: AppM Value
+getVersion =
+    return $
+        object
+            [ "version" .= Version.version
+            , "gitHash" .= Version.gitHash
+            , "gitTag" .= Version.gitTag
+            , "buildTarget" .= Version.buildTarget
+            ]
+
+getHosting :: AppM Value
+getHosting = do
+    hostingConfig <- asks aeHostingConfig
+    return $ case hostingConfig of
+        Just hc ->
+            object
+                [ "is_hosted" .= True
+                , "max_uploads" .= Config.hcMaxUploads hc
+                , "api_access" .= Config.hcApiAccess hc
+                , "upgrade_upload" .= Config.hcUpgradeUpload hc
+                , "upgrade_api" .= Config.hcUpgradeApi hc
+                , "upgrade_vm_size" .= Config.hcUpgradeVmSize hc
+                ]
+        Nothing ->
+            object
+                [ "is_hosted" .= False
+                , "max_uploads" .= (-1 :: Int)
+                , "api_access" .= True
+                , "upgrade_upload" .= ("" :: Text)
+                , "upgrade_api" .= ("" :: Text)
+                , "upgrade_vm_size" .= ("" :: Text)
+                ]
+
+getStats :: AppM Value
+getStats = liftIO $ do
+    enabled <- GHC.Stats.getRTSStatsEnabled
+    if enabled
+        then do
+            stats <- GHC.Stats.getRTSStats
+            return $
+                object
+                    [ "memory_used_bytes" .= GHC.Stats.gcdetails_live_bytes (GHC.Stats.gc stats)
+                    , "memory_allocated_bytes" .= GHC.Stats.allocated_bytes stats
+                    , "gc_count" .= GHC.Stats.gcs stats
+                    ]
+        else
+            return $
+                object
+                    ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)]
+
+getClassificationPresets :: AppM [ClassificationPresetInfo]
+getClassificationPresets = do
+    presets <- asks aeClassificationPresets
+    return $ map toInfo presets
+  where
+    toInfo p =
+        ClassificationPresetInfo
+            { cpiName = Config.cpName p
+            , cpiLabel = Config.cpLabel p
+            , cpiDescription = Config.cpDescription p
+            , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p)
+            }
+
+getLogsHandler :: Maybe Int -> AppM Value
+getLogsHandler sinceMaybe = do
+    let since = fromMaybe 0 sinceMaybe
+    (nextIndex, logLines) <- liftIO $ getLogLines since
+    return $
+        object
+            [ "lines" .= logLines
+            , "nextIndex" .= nextIndex
+            ]
+
+postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value)
+postAuth loginReq = do
+    password <- asks aePassword
+    case password of
+        Nothing ->
+            return $ noHeader $ object ["ok" .= True]
+        Just pwd ->
+            if T.unpack (lrCode loginReq) == pwd
+                then
+                    let cookieValue = "volca_session=" ++ pwd ++ "; Path=/; HttpOnly; SameSite=Strict"
+                     in return $ addHeader cookieValue $ object ["ok" .= True]
+                else
+                    throwError err401{errBody = "{\"error\":\"invalid code\"}"}
+
+getActivityInfo :: Text -> Text -> AppM ActivityInfo
+getActivityInfo dbName processId = do
+    dbManager <- asks aeDbManager
+    (db, _) <- requireDatabaseByName dbName
+    unitCfg <- liftIO $ getMergedUnitConfig dbManager
+    result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId)
+    case fromJSON result of
+        Success activityInfo -> return activityInfo
+        Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err}
+
+getActivityFlows :: Text -> Text -> AppM [FlowSummary]
+getActivityFlows dbName processId = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedActivity db processId $ \activity ->
+        return $ Service.getActivityFlowSummaries db activity
+
+getActivityInputs :: Text -> Text -> AppM [ExchangeDetail]
+getActivityInputs dbName processId = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedActivity db processId $ \activity ->
+        return $ Service.getActivityInputDetails db activity
+
+getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail]
+getActivityOutputs dbName processId = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedActivity db processId $ \activity ->
+        return $ Service.getActivityOutputDetails db activity
+
+getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail
+getActivityReferenceProduct dbName processId = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedActivity db processId $ \activity ->
+        case Service.getActivityReferenceProductDetail db activity of
+            Nothing -> throwError err404{errBody = "No reference product found"}
+            Just refProduct -> return refProduct
+
+getActivityTree :: Text -> Text -> AppM TreeExport
+getActivityTree dbName processId = do
+    dbManager <- asks aeDbManager
+    maxTreeDepth <- asks aeMaxTreeDepth
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedActivity db processId $ \_activity -> do
+        let activityUuidText = case T.splitOn "_" processId of
+                (uuid : _) -> uuid
+                [] -> processId
+        case UUID.fromText activityUuidText of
+            Nothing -> throwError err400{errBody = "Invalid activity UUID format"}
+            Just activityUuid -> do
+                unitCfg <- liftIO $ getMergedUnitConfig dbManager
+                let loopAwareTree = buildLoopAwareTree unitCfg db activityUuid maxTreeDepth
+                return $ Service.convertToTreeExport db processId maxTreeDepth loopAwareTree
+
+-- | Inventory with optional substitutions; goes through the cross-DB
+-- back-substitution path so dep-DB inventories merge into the response.
+activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport
+activityInventoryCore dbName processIdText mSub = do
+    dbManager <- asks aeDbManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    (processId, activity) <- resolveOrThrow db processIdText
+    sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
+    (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+    pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol)
+
+getActivityInventory :: Text -> Text -> AppM InventoryExport
+getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing
+
+getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport
+getActivityGraph dbName processId maybeCutoff = do
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    let cutoffPercent = fromMaybe 1.0 maybeCutoff
+    result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent
+    either throwServiceError pure result
+
+-- | Supply-chain core (scaling-vector based). 'Nothing' takes the cached
+-- solve; 'Just' applies substitutions via the cross-DB resolver.
+activitySupplyChainCore
+    :: Text
+    -> Text
+    -> Maybe Text
+    -> Maybe Int
+    -> Maybe Double
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> Maybe SubstitutionRequest
+    -> AppM SupplyChainResponse
+activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do
+    dbManager <- asks aeDbManager
+    presets <- asks aeClassificationPresets
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    let includeEdges = fromMaybe False includeEdgesParam
+        scf =
+            buildSupplyChainFilter
+                presets
+                nameFilter
+                limitParam
+                minQuantity
+                offsetParam
+                maxDepthParam
+                locationFilter
+                productFilter
+                presetParam
+                classSystems
+                classValues
+                classModes
+                sortParam
+                orderParam
+    case mSub of
+        Nothing -> do
+            unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+            result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges
+            either throwServiceError pure result
+        Just subReq -> do
+            (processId, _) <- resolveOrThrow db processIdText
+            scalingResult <-
+                liftIO $
+                    Service.computeScalingVectorWithSubstitutionsCrossDB
+                        (DM.mkDepSolverLookup dbManager)
+                        db
+                        dbName
+                        sharedSolver
+                        processId
+                        (srSubstitutions subReq)
+            case scalingResult of
+                Left err -> throwServiceError err
+                Right (scalingVec, virtualLinks) -> do
+                    unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+                    eResp <-
+                        liftIO $
+                            Service.buildSupplyChainFromScalingVectorCrossDB
+                                unitCfg
+                                (DM.mkDepSolverLookup dbManager)
+                                db
+                                dbName
+                                processId
+                                scalingVec
+                                virtualLinks
+                                scf
+                                includeEdges
+                    either throwServiceError pure eResp
+
+getActivitySupplyChain
+    :: Text
+    -> Text
+    -> Maybe Text
+    -> Maybe Int
+    -> Maybe Double
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> AppM SupplyChainResponse
+getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam =
+    activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing
+
+-- | Aggregate endpoint with accumulating field-level validation (a single
+-- request can report invalid `scope` and invalid `aggregate` together).
+getActivityAggregate
+    :: Text
+    -> Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> Maybe Text
+    -> Maybe Text
+    -> AppM Aggregation
+getActivityAggregate dbName processId scopeParam isInputParam maxDepthParam fnameParam fnameNotParam funitParam presetParam fclassParams ftargetParam fexchangeTypeParam freferenceParam groupByParam aggregateParam = do
+    dbManager <- asks aeDbManager
+    presets <- asks aeClassificationPresets
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    let parseScope = \case
+            Just "direct" -> V.Success Agg.ScopeDirect
+            Just "supply_chain" -> V.Success Agg.ScopeSupplyChain
+            Just "biosphere" -> V.Success Agg.ScopeBiosphere
+            _ -> V.failure "scope must be one of: direct | supply_chain | biosphere"
+        parseExType = \case
+            Nothing -> V.Success Nothing
+            Just "technosphere" -> V.Success (Just Agg.KindTechnosphere)
+            Just "biosphere" -> V.Success (Just Agg.KindBiosphere)
+            Just "waste" -> V.Success (Just Agg.KindWaste)
+            Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste"
+        parseAgg = \case
+            Nothing -> V.Success Agg.AggSum
+            Just "sum_quantity" -> V.Success Agg.AggSum
+            Just "count" -> V.Success Agg.AggCount
+            Just "share" -> V.Success Agg.AggShare
+            Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")")
+    (scope, exchangeType, aggFn) <-
+        case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of
+            Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))}
+            Right v -> pure v
+    case (exchangeType, scope) of
+        (Just _, Agg.ScopeBiosphere) ->
+            throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"}
+        (Just _, Agg.ScopeSupplyChain) ->
+            throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"}
+        _ -> return ()
+    let presetFilters = expandPreset presets presetParam
+        explicitFilters = mapMaybe parseClassFilter fclassParams
+        params =
+            Agg.AggregateParams
+                { Agg.apScope = scope
+                , Agg.apIsInput = isInputParam
+                , Agg.apMaxDepth = maxDepthParam
+                , Agg.apFilterName = fnameParam
+                , Agg.apFilterNameNot = maybe [] (map T.strip . T.splitOn ",") fnameNotParam
+                , Agg.apFilterUnit = funitParam
+                , Agg.apFilterClassifications = presetFilters ++ explicitFilters
+                , Agg.apFilterTargetName = ftargetParam
+                , Agg.apFilterExchangeType = exchangeType
+                , Agg.apFilterIsReference = freferenceParam
+                , Agg.apGroupBy = groupByParam
+                , Agg.apAggregate = aggFn
+                }
+    unitCfg <- liftIO $ getMergedUnitConfig dbManager
+    (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+    result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params
+    either throwServiceError pure result
+
+-- | LCIA single-method core. GET passes a top-flows param and logs;
+-- POST carries substitutions instead and skips logging.
+activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult
+activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do
+    dbManager <- asks aeDbManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    method <- loadMethodByUUID methodIdText
+    (processId, activity) <- resolveOrThrow db processIdText
+    sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
+    result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method
+    when (isNothing mSub) $ liftIO $ logLCIAResult result method
+    pure result
+
+getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult
+getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam =
+    activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing
+
+postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult
+postActivityLCIA dbName processIdText _collectionName methodIdText subReq =
+    activityLCIACore dbName processIdText methodIdText Nothing (Just subReq)
+
+-- | Sensitivity sweep: rank-1 perturbations on the root scaling, scored
+-- through the cross-DB graph (regional CFs on dep DBs still apply).
+postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse
+postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do
+    dbManager <- asks aeDbManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    requireFullyLinked dbName db
+    method <- loadMethodByUUID methodIdText
+    (processId, activity) <- resolveOrThrow db processIdText
+    eRes <- liftIO $ Service.computeSensitivities db sharedSolver processId (srPerturbations senReq)
+    (baselineX, perResults) <- either throwServiceError pure eRes
+    unitCfg <- liftIO $ getMergedUnitConfig dbManager
+    let depLookup = DM.mkDepSolverLookup dbManager
+        scaleToSolution x = do
+            eSol <-
+                SharedSolver.goWithDepsFromScalings
+                    unitCfg
+                    depLookup
+                    db
+                    dbName
+                    []
+                    [x]
+                    0
+            pure $ case eSol of
+                Left err -> Left err
+                Right (sol : _) -> Right sol
+                Right [] -> Left "cross-DB propagation returned empty result"
+        buildEntry baselineLcia (p, eitherX) = case eitherX of
+            Left err -> pure (PerturbedEntry p (Left err))
+            Right x' -> do
+                eSol <- scaleToSolution x'
+                case eSol of
+                    Left err -> pure (PerturbedEntry p (Left err))
+                    Right sol -> do
+                        lcia <- computeCategoryResult dbManager dbName db sol activity 5 Nothing method
+                        pure (PerturbedEntry p (Right (lcia, lrScore lcia - lrScore baselineLcia)))
+    eBaselineSol <- liftIO $ scaleToSolution baselineX
+    baselineSol <-
+        either
+            (\err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err})
+            pure
+            eBaselineSol
+    baselineLcia <-
+        liftIO $
+            computeCategoryResult dbManager dbName db baselineSol activity 5 Nothing method
+    perturbed <-
+        liftIO $
+            mapConcurrently (buildEntry baselineLcia) perResults
+    pure SensitivityResponse{srBaseline = baselineLcia, srPerturbed = perturbed}
+
+getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult
+getActivityLCIABatch dbName processIdText collectionName =
+    activityLCIABatchH dbName processIdText collectionName Nothing
+
+postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult
+postActivityLCIABatch dbName processIdText collectionName subReq =
+    activityLCIABatchH dbName processIdText collectionName (Just subReq)
+
+postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport
+postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq)
+
+postActivitySupplyChain
+    :: Text
+    -> Text
+    -> Maybe Text
+    -> Maybe Int
+    -> Maybe Double
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> SubstitutionRequest
+    -> AppM SupplyChainResponse
+postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq =
+    activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq)
+
+getActivityConsumers
+    :: Text
+    -> Text
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Text
+    -> [Text]
+    -> [Text]
+    -> [Text]
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Int
+    -> Maybe Text
+    -> Maybe Text
+    -> Maybe Bool
+    -> AppM ConsumersResponse
+getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do
+    presets <- asks aeClassificationPresets
+    (db, _) <- requireDatabaseByName dbName
+    let cnf =
+            Service.ConsumerFilter
+                { Service.cnfCore =
+                    Service.ActivityFilterCore
+                        { Service.afcName = nameFilter
+                        , Service.afcLocation = locationFilter
+                        , Service.afcProduct = productFilter
+                        , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes
+                        , Service.afcLimit = limitParam
+                        , Service.afcOffset = offsetParam
+                        , Service.afcSort = sortParam
+                        , Service.afcOrder = orderParam
+                        }
+                , Service.cnfMaxDepth = maxDepthParam
+                , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam
+                }
+    either throwServiceError pure (Service.getConsumers db dbName processIdText cnf)
+
+getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value
+getActivityPathTo dbName processIdText targetParam = do
+    (db, solver) <- requireDatabaseByName dbName
+    target <-
+        maybe
+            (throwError err400{errBody = "Missing required 'target' query parameter"})
+            pure
+            targetParam
+    result <- liftIO $ Service.getPathTo db solver processIdText target
+    case result of
+        Left (Service.ActivityNotFound msg) ->
+            throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
+        Left (Service.InvalidProcessId msg) ->
+            throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
+        Left err ->
+            throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
+        Right val -> return val
+
+getActivityAnalyze :: Text -> Text -> Text -> AppM Value
+getActivityAnalyze dbName processIdText analyzerName = do
+    dbManager <- asks aeDbManager
+    (db, sharedSolver) <- requireDatabaseByName dbName
+    case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of
+        Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)}
+        Just analyzer -> do
+            case Service.resolveActivityAndProcessId db processIdText of
+                Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
+                Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
+                Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
+                Right (actProcessId, _) -> do
+                    inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
+                    (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+                    loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
+                    let methods = map snd loadedMethods
+                        ctx =
+                            AnalyzeContext
+                                { acDatabase = db
+                                , acInventory = inventory
+                                , acMethods = methods
+                                , acParameters = M.empty
+                                , acTechFlowDB = M.empty
+                                , acBioFlowDB = mFlows
+                                , acUnitDB = mUnits
+                                }
+                    liftIO $ ahAnalyze analyzer ctx
+
+getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult
+getContributingFlows dbName processIdText _collectionName methodIdText limitParam =
+    withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
+        dbManager <- asks aeDbManager
+        let lim = fromMaybe 20 limitParam
+        unitCfg <- liftIO $ getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
+        tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
+        let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables)
+            (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables
+            contribs = sortOn (\(_, _, c) -> negate (abs c)) rawContribs
+            topFlows =
+                [ FlowContributionEntry
+                    { fcoFlowName = bfName f
+                    , fcoContribution = c
+                    , fcoSharePct = if score /= 0 then c / score * 100 else 0
+                    , fcoFlowId = UUID.toText (bfId f)
+                    , fcoCategory = bfCompartmentName f
+                    , fcoCompartment = bfCompartmentSub f
+                    , fcoCfValue = cfVal
+                    }
+                | (f, cfVal, c) <- take lim contribs
+                ]
+        liftIO $
+            unless (null unknownUuids) $
+                reportProgress Warning $
+                    "[contributing-flows "
+                        <> T.unpack (methodName method)
+                        <> "] "
+                        <> show (length unknownUuids)
+                        <> " inventory flow UUID(s) absent from merged FlowDB. Samples: "
+                        <> show (take 3 unknownUuids)
+        return
+            ContributingFlowsResult
+                { cfrMethod = methodName method
+                , cfrUnit = methodUnit method
+                , cfrTotalScore = score
+                , cfrTopFlows = topFlows
+                }
+
+getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult
+getContributingActivities dbName processIdText _collectionName methodIdText limitParam =
+    withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
+        dbManager <- asks aeDbManager
+        let lim = fromMaybe 10 limitParam
+        requireFullyLinked dbName db
+        unitCfg <- liftIO $ getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
+        eContribs <-
+            liftIO $
+                SharedSolver.crossDBProcessContributions
+                    unitCfg
+                    mUnits
+                    mFlows
+                    (DM.mkDepSolverLookup dbManager)
+                    db
+                    dbName
+                    sharedSolver
+                    actProcessId
+                    tables
+        case eContribs of
+            Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err}
+            Right contributions -> do
+                let score = sum (M.elems contributions)
+                    sorted = sortOn (\(_, c) -> negate (abs c)) (M.toList contributions)
+                    top = take lim sorted
+                rows <- liftIO $ mapM (mkCrossDBContrib dbManager dbName mFlows mUnits score) top
+                return
+                    ContributingActivitiesResult
+                        { carMethod = methodName method
+                        , carUnit = methodUnit method
+                        , carTotalScore = score
+                        , carActivities = rows
+                        }
+
+getFlowDetail :: Text -> Text -> AppM FlowDetail
+getFlowDetail dbName flowIdText = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedFlow db flowIdText $ \flow -> do
+        let fid = flowKindId flow
+            unitName' = flowKindUnitName (dbUnits db) flow
+            usageCount = Service.getFlowUsageCount db fid
+        return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount
+
+getFlowActivities :: Text -> Text -> AppM [ActivitySummary]
+getFlowActivities dbName flowIdText = do
+    (db, _) <- requireDatabaseByName dbName
+    withValidatedFlow db flowIdText $ \flow ->
+        return $ Service.getActivitiesUsingFlow db (flowKindId flow)
+
+getMethods :: AppM [MethodSummary]
+getMethods = do
+    dbManager <- asks aeDbManager
+    loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
+    return
+        [ MethodSummary
+            { msmId = methodId m
+            , msmName = methodName m
+            , msmCategory = methodCategory m
+            , msmUnit = methodUnit m
+            , msmFactorCount = length (methodFactors m)
+            , msmCollection = collName
+            }
+        | (collName, m) <- loadedMethods
+        ]
+
+getMethodDetail :: Text -> AppM MethodDetail
+getMethodDetail methodIdText = do
+    method <- loadMethodByUUID methodIdText
+    return $
+        MethodDetail
+            { mdId = methodId method
+            , mdName = methodName method
+            , mdDescription = methodDescription method
+            , mdUnit = methodUnit method
+            , mdCategory = methodCategory method
+            , mdMethodology = methodMethodology method
+            , mdFactorCount = length (methodFactors method)
+            }
+
+getMethodFactors :: Text -> AppM [MethodFactorAPI]
+getMethodFactors methodIdText = do
+    method <- loadMethodByUUID methodIdText
+    return $ map cfToAPI (methodFactors method)
+
+getMethodMapping :: Text -> Text -> AppM MappingStatus
+getMethodMapping dbName methodIdText = do
+    dbManager <- asks aeDbManager
+    (db, _) <- requireDatabaseByName dbName
+    method <- loadMethodByUUID methodIdText
+    mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
+    let stats = computeMappingStats mappings
+        totalFactors = length mappings
+        coverage =
+            if totalFactors > 0
+                then fromIntegral (totalFactors - msUnmatched stats) / fromIntegral totalFactors * 100
+                else 0.0
+        unmappedFlows =
+            take
+                50
+                [ UnmappedFlowAPI
+                    { ufaFlowRef = mcfFlowRef cf
+                    , ufaFlowName = mcfFlowName cf
+                    , ufaDirection = case mcfDirection cf of
+                        MT.Input -> "Input"
+                        MT.Output -> "Output"
+                    }
+                | (cf, Nothing) <- mappings
+                ]
+        uniqueDbFlows = S.size $ S.fromList [bfId f | (_, Just (f, _)) <- mappings]
+    return
+        MappingStatus
+            { mstMethodId = methodId method
+            , mstMethodName = methodName method
+            , mstTotalFactors = msTotal stats
+            , mstMappedByUUID = msByUUID stats
+            , mstMappedByCAS = msByCAS stats
+            , mstMappedByName = msByName stats
+            , mstMappedBySynonym = msBySynonym stats
+            , mstUnmapped = msUnmatched stats
+            , mstCoverage = coverage
+            , mstDbBiosphereCount = fromIntegral (dbBiosphereCount db)
+            , mstUniqueDbFlowsMatched = uniqueDbFlows
+            , mstUnmappedFlows = unmappedFlows
+            }
+
+getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping
+getFlowCFMapping dbName methodIdText = do
+    dbManager <- asks aeDbManager
+    (db, _) <- requireDatabaseByName dbName
+    method <- loadMethodByUUID methodIdText
+    mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
+    let reverseIndex =
+            M.fromList
+                [(bfId f, (cf, strat)) | (cf, Just (f, strat)) <- mappings]
+        entries = map (buildFlowEntry db reverseIndex) (V.toList (dbBiosphereOrder db))
+        matchedCount = length [() | e <- entries, isJust (fceCfValue e)]
+    return
+        FlowCFMapping
+            { fcmMethodName = methodName method
+            , fcmMethodUnit = methodUnit method
+            , fcmTotalFlows = fromIntegral (dbBiosphereCount db)
+            , fcmMatchedFlows = matchedCount
+            , fcmFlows = entries
+            }
+
+getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult
+getCharacterization dbName methodIdText flowFilter limitParam = do
+    dbManager <- asks aeDbManager
+    (db, _) <- requireDatabaseByName dbName
+    method <- loadMethodByUUID methodIdText
+    let lim = fromMaybe 50 limitParam
+        queryLower = fmap T.toLower flowFilter
+    mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
+    let matched =
+            [ (cf, f, strat)
+            | (cf, Just (f, strat)) <- mappings
+            , matchesQuery queryLower (mcfFlowName cf) (bfName f)
+            ]
+        sorted = sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched
+        top = take lim sorted
+        mkEntry (cf, f, strat) =
+            CharacterizationEntry
+                { cheMethodFlowName = mcfFlowName cf
+                , cheCfValue = mcfValue cf
+                , cheCfUnit = mcfUnit cf
+                , cheDirection = case mcfDirection cf of
+                    MT.Input -> "Input"
+                    MT.Output -> "Output"
+                , cheDbFlowName = bfName f
+                , cheFlowId = UUID.toText (bfId f)
+                , cheFlowUnit = getUnitNameForBioFlow (dbUnits db) f
+                , cheCategory = bfCompartmentName f
+                , cheCompartment = bfCompartmentSub f
+                , cheMatchStrategy = strategyToText strat
+                }
+    return
+        CharacterizationResult
+            { chrMethod = methodName method
+            , chrUnit = methodUnit method
+            , chrMatches = length matched
+            , chrShown = length top
+            , chrFactors = map mkEntry top
+            }
+
+getMethodCollections :: AppM MethodCollectionListResponse
+getMethodCollections = do
+    dbManager <- asks aeDbManager
+    statuses <- liftIO $ DM.listMethodCollections dbManager
+    return $
+        MethodCollectionListResponse
+            [ MethodCollectionStatusAPI
+                { mcaName = mcsName s
+                , mcaDisplayName = mcsDisplayName s
+                , mcaDescription = mcsDescription s
+                , mcaStatus = case mcsStatus s of
+                    DM.Loaded -> "loaded"
+                    _ -> "unloaded"
+                , mcaIsUploaded = mcsIsUploaded s
+                , mcaPath = mcsPath s
+                , mcaMethodCount = mcsMethodCount s
+                , mcaFormat = Just (mcsFormat s)
+                }
+            | s <- statuses
+            ]
+
+loadMethodCollectionHandler :: Text -> AppM ActivateResponse
+loadMethodCollectionHandler name = do
+    dbManager <- asks aeDbManager
+    simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name)
+
+unloadMethodCollectionHandler :: Text -> AppM ActivateResponse
+unloadMethodCollectionHandler name = do
+    dbManager <- asks aeDbManager
+    simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name)
+
+searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult)
+searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do
+    (db, _) <- requireDatabaseByName dbName
+    case queryParam of
+        Nothing -> return (SearchResults [] 0 0 50 False 0.0)
+        Just query -> do
+            let ff =
+                    Service.FlowFilter
+                        { Service.ffQuery = query
+                        , Service.ffLang = langParam
+                        , Service.ffLimit = limitParam
+                        , Service.ffOffset = offsetParam
+                        , Service.ffSort = sortParam
+                        , Service.ffOrder = orderParam
+                        }
+            searchFlowsInternal db ff
+
+searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary)
+searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do
+    presets <- asks aeClassificationPresets
+    (db, _) <- requireDatabaseByName dbName
+    let exactMatch = fromMaybe False exactParam
+        sf =
+            Service.SearchFilter
+                { Service.sfCore =
+                    Service.ActivityFilterCore
+                        { Service.afcName = nameParam
+                        , Service.afcLocation = geoParam
+                        , Service.afcProduct = productParam
+                        , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes
+                        , Service.afcLimit = limitParam
+                        , Service.afcOffset = offsetParam
+                        , Service.afcSort = sortParam
+                        , Service.afcOrder = orderParam
+                        }
+                , Service.sfExactMatch = exactMatch
+                }
+    result <- liftIO $ Service.searchActivities db sf
+    case result of
+        Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
+        Right jsonValue -> case fromJSON jsonValue of
+            Success searchResults -> return searchResults
+            Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr}
+
+getClassifications :: Text -> AppM [ClassificationSystem]
+getClassifications dbName = do
+    (db, _) <- requireDatabaseByName dbName
+    return $ Service.getClassifications db
+
+postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse
+postImpactsBatch = batchImpactsH
+
+-- ---------------------------------------------------------------------------
+-- Servant server
+-- ---------------------------------------------------------------------------
+
 lcaServer :: AppEnv -> Server LCAAPI
-lcaServer env =
-    hoistServer lcaAPI (runApp env) handlers
+lcaServer env = hoistServer lcaAPI (runApp env) handlers
   where
-    -- Legacy closure access to env fields, kept so handler bodies (and
-    -- the helpers they call) don't all need an explicit `asks`. We could
-    -- replace this with capability-class projections at every call site,
-    -- but the let-binding is identical in LOC and easier to read.
-    dbManager = aeDbManager env
-    maxTreeDepth = aeMaxTreeDepth env
-    password = aePassword env
-    hostingConfig = aeHostingConfig env
-    classificationPresets = aeClassificationPresets env
     handlers =
         getActivityInfo
         :<|> getActivityFlows
@@ -965,7 +1924,6 @@ lcaServer env =
         :<|> unloadMethodCollectionHandler
         :<|> DBHandlers.deleteMethodHandler
         :<|> DBHandlers.uploadMethodHandler
-        -- Flow synonyms
         :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms
         :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms
         :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms
@@ -973,13 +1931,11 @@ lcaServer env =
         :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms
         :<|> DBHandlers.getFlowSynonymGroupsHandler
         :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms
-        -- Compartment mappings
         :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings
         :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings
         :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings
         :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings
         :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings
-        -- Units
         :<|> DBHandlers.listRefData DBHandlers.UnitDefs
         :<|> DBHandlers.loadRefData DBHandlers.UnitDefs
         :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs
@@ -992,958 +1948,6 @@ lcaServer env =
         :<|> getStats
         :<|> getClassificationPresets
         :<|> getOpenApiSpec
-    getOpenApiSpec :: AppM Value
-    getOpenApiSpec = return $ toJSON volcaOpenApi
-
-    getVersion :: AppM Value
-    getVersion =
-        return $
-            object
-                [ "version" .= Version.version
-                , "gitHash" .= Version.gitHash
-                , "gitTag" .= Version.gitTag
-                , "buildTarget" .= Version.buildTarget
-                ]
-
-    getHosting :: AppM Value
-    getHosting = return $ case hostingConfig of
-        Just hc ->
-            object
-                [ "is_hosted" .= True
-                , "max_uploads" .= Config.hcMaxUploads hc
-                , "api_access" .= Config.hcApiAccess hc
-                , "upgrade_upload" .= Config.hcUpgradeUpload hc
-                , "upgrade_api" .= Config.hcUpgradeApi hc
-                , "upgrade_vm_size" .= Config.hcUpgradeVmSize hc
-                ]
-        Nothing ->
-            object
-                [ "is_hosted" .= False
-                , "max_uploads" .= (-1 :: Int)
-                , "api_access" .= True
-                , "upgrade_upload" .= ("" :: Text)
-                , "upgrade_api" .= ("" :: Text)
-                , "upgrade_vm_size" .= ("" :: Text)
-                ]
-
-    getStats :: AppM Value
-    getStats = liftIO $ do
-        enabled <- GHC.Stats.getRTSStatsEnabled
-        if enabled
-            then do
-                stats <- GHC.Stats.getRTSStats
-                return $
-                    object
-                        [ "memory_used_bytes" .= GHC.Stats.gcdetails_live_bytes (GHC.Stats.gc stats)
-                        , "memory_allocated_bytes" .= GHC.Stats.allocated_bytes stats
-                        , "gc_count" .= GHC.Stats.gcs stats
-                        ]
-            else
-                return $
-                    object
-                        ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)]
-
-    getClassificationPresets :: AppM [ClassificationPresetInfo]
-    getClassificationPresets = return $ map toInfo classificationPresets
-      where
-        toInfo p =
-            ClassificationPresetInfo
-                { cpiName = Config.cpName p
-                , cpiLabel = Config.cpLabel p
-                , cpiDescription = Config.cpDescription p
-                , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p)
-                }
-
-    getLogsHandler :: Maybe Int -> AppM Value
-    getLogsHandler sinceMaybe = do
-        let since = fromMaybe 0 sinceMaybe
-        (nextIndex, logLines) <- liftIO $ getLogLines since
-        return $
-            object
-                [ "lines" .= logLines
-                , "nextIndex" .= nextIndex
-                ]
-
-    postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value)
-    postAuth loginReq =
-        case password of
-            Nothing ->
-                -- No password configured, auth always succeeds
-                return $ noHeader $ object ["ok" .= True]
-            Just pwd ->
-                if T.unpack (lrCode loginReq) == pwd
-                    then
-                        let cookieValue = "volca_session=" ++ pwd ++ "; Path=/; HttpOnly; SameSite=Strict"
-                         in return $ addHeader cookieValue $ object ["ok" .= True]
-                    else
-                        throwError err401{errBody = "{\"error\":\"invalid code\"}"}
-
-    -- Core activity endpoint - streamlined data
-    getActivityInfo :: Text -> Text -> AppM ActivityInfo
-    getActivityInfo dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        unitCfg <- liftIO $ getMergedUnitConfig dbManager
-        result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId)
-        case fromJSON result of
-            Success activityInfo -> return activityInfo
-            Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err}
-
-    -- Activity flows sub-resource
-    getActivityFlows :: Text -> Text -> AppM [FlowSummary]
-    getActivityFlows dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedActivity db processId $ \activity ->
-            return $ Service.getActivityFlowSummaries db activity
-
-    -- Activity inputs sub-resource
-    getActivityInputs :: Text -> Text -> AppM [ExchangeDetail]
-    getActivityInputs dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedActivity db processId $ \activity ->
-            return $ Service.getActivityInputDetails db activity
-
-    -- Activity outputs sub-resource
-    getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail]
-    getActivityOutputs dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedActivity db processId $ \activity ->
-            return $ Service.getActivityOutputDetails db activity
-
-    -- Activity reference product sub-resource
-    getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail
-    getActivityReferenceProduct dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedActivity db processId $ \activity -> do
-            case Service.getActivityReferenceProductDetail db activity of
-                Nothing -> throwError err404{errBody = "No reference product found"}
-                Just refProduct -> return refProduct
-
-    -- Activity tree export for visualization (configurable depth)
-    getActivityTree :: Text -> Text -> AppM TreeExport
-    getActivityTree dbName processId = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedActivity db processId $ \_activity -> do
-            -- Use CLI --tree-depth option for configurable depth
-            -- Default depth limit prevents DOS attacks via deep tree requests
-            -- Extract activity UUID from processId (format: activityUUID_productUUID)
-            let activityUuidText = case T.splitOn "_" processId of
-                    (uuid : _) -> uuid
-                    [] -> processId -- Fallback
-            case UUID.fromText activityUuidText of
-                Nothing -> throwError err400{errBody = "Invalid activity UUID format"}
-                Just activityUuid -> do
-                    unitCfg <- liftIO $ getMergedUnitConfig dbManager
-                    let loopAwareTree = buildLoopAwareTree unitCfg db activityUuid maxTreeDepth
-                    return $ Service.convertToTreeExport db processId maxTreeDepth loopAwareTree
-
-    -- Activity inventory calculation (full supply chain LCI).
-    -- Goes through the cross-DB back-substitution path so inventories from
-    -- dep DBs are merged into the returned flow map; metadata (flow names,
-    -- units) comes from the merged FlowDB/UnitDB snapshot.
-    activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport
-    activityInventoryCore dbName processIdText mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        (processId, activity) <- resolveOrThrow db processIdText
-        sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
-        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-        pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol)
-
-    getActivityInventory :: Text -> Text -> AppM InventoryExport
-    getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing
-
-    -- Activity graph endpoint for network visualization
-    getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport
-    getActivityGraph dbName processId maybeCutoff = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff
-        result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent
-        either throwServiceError pure result
-
-    -- Build the supply-chain filter shared by the GET and POST handlers.
-    buildSupplyChainFilter :: Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Service.SupplyChainFilter
-    buildSupplyChainFilter nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam =
-        let presetFilters = expandPreset classificationPresets presetParam
-            explicitFilters =
-                zipWith3
-                    (\s v m -> (s, v, m == "exact"))
-                    classSystems
-                    classValues
-                    (classModes ++ repeat "contains")
-            classFilters = presetFilters ++ explicitFilters
-         in Service.SupplyChainFilter
-                { Service.scfCore =
-                    Service.ActivityFilterCore
-                        { Service.afcName = nameFilter
-                        , Service.afcLocation = locationFilter
-                        , Service.afcProduct = productFilter
-                        , Service.afcClassifications = classFilters
-                        , Service.afcLimit = limitParam
-                        , Service.afcOffset = offsetParam
-                        , Service.afcSort = sortParam
-                        , Service.afcOrder = orderParam
-                        }
-                , Service.scfMaxDepth = maxDepthParam
-                , Service.scfMinQuantity = minQuantity
-                }
-
-    -- Activity supply chain endpoint (scaling vector based). 'Nothing' takes the
-    -- cached solve; 'Just' resolves substitutions through the cross-DB resolver.
-    activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> AppM SupplyChainResponse
-    activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        let includeEdges = fromMaybe False includeEdgesParam
-            scf =
-                buildSupplyChainFilter
-                    nameFilter
-                    limitParam
-                    minQuantity
-                    offsetParam
-                    maxDepthParam
-                    locationFilter
-                    productFilter
-                    presetParam
-                    classSystems
-                    classValues
-                    classModes
-                    sortParam
-                    orderParam
-        case mSub of
-            Nothing -> do
-                unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges
-                either throwServiceError pure result
-            Just subReq -> do
-                (processId, _) <- resolveOrThrow db processIdText
-                -- Use the cross-DB-aware substitution resolver so qualified PIDs in
-                -- subFrom/subTo are accepted; the virtual cross-DB links it returns
-                -- don't affect the root scaling vector (they drive dep-DB demand),
-                -- which is all the supply-chain navigation reads.
-                scalingResult <-
-                    liftIO $
-                        Service.computeScalingVectorWithSubstitutionsCrossDB
-                            (DM.mkDepSolverLookup dbManager)
-                            db
-                            dbName
-                            sharedSolver
-                            processId
-                            (srSubstitutions subReq)
-                case scalingResult of
-                    Left err -> throwServiceError err
-                    Right (scalingVec, virtualLinks) -> do
-                        unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                        eResp <-
-                            liftIO $
-                                Service.buildSupplyChainFromScalingVectorCrossDB
-                                    unitCfg
-                                    (DM.mkDepSolverLookup dbManager)
-                                    db
-                                    dbName
-                                    processId
-                                    scalingVec
-                                    virtualLinks
-                                    scf
-                                    includeEdges
-                        either throwServiceError pure eResp
-
-    getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM SupplyChainResponse
-    getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam =
-        activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing
-
-    -- Activity aggregate endpoint (generic SQL-group-by-style aggregation)
-    getActivityAggregate ::
-        Text ->
-        Text ->
-        Maybe Text -> -- scope
-        Maybe Bool -> -- is_input
-        Maybe Int -> -- max_depth
-        Maybe Text -> -- filter_name
-        Maybe Text -> -- filter_name_not
-        Maybe Text -> -- filter_unit
-        Maybe Text -> -- preset
-        [Text] -> -- filter_classification (repeatable: "System=Value[:exact]")
-        Maybe Text -> -- filter_target_name
-        Maybe Text -> -- filter_exchange_type ("technosphere" | "biosphere" | "waste")
-        Maybe Bool -> -- filter_is_reference
-        Maybe Text -> -- group_by
-        Maybe Text -> -- aggregate fn
-        AppM Aggregation
-    getActivityAggregate
-        dbName
-        processId
-        scopeParam
-        isInputParam
-        maxDepthParam
-        fnameParam
-        fnameNotParam
-        funitParam
-        presetParam
-        fclassParams
-        ftargetParam
-        fexchangeTypeParam
-        freferenceParam
-        groupByParam
-        aggregateParam = do
-            (db, sharedSolver) <- requireDatabaseByName dbName
-            -- Field-level validation via the Validation Applicative (accumulating).
-            -- A request with both an invalid `scope` and an invalid `aggregate`
-            -- now reports both errors at once, instead of just the first.
-            let parseScope = \case
-                    Just "direct" -> V.Success Agg.ScopeDirect
-                    Just "supply_chain" -> V.Success Agg.ScopeSupplyChain
-                    Just "biosphere" -> V.Success Agg.ScopeBiosphere
-                    _ -> V.failure "scope must be one of: direct | supply_chain | biosphere"
-                parseExType = \case
-                    Nothing -> V.Success Nothing
-                    Just "technosphere" -> V.Success (Just Agg.KindTechnosphere)
-                    Just "biosphere" -> V.Success (Just Agg.KindBiosphere)
-                    Just "waste" -> V.Success (Just Agg.KindWaste)
-                    Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste"
-                parseAgg = \case
-                    Nothing -> V.Success Agg.AggSum
-                    Just "sum_quantity" -> V.Success Agg.AggSum
-                    Just "count" -> V.Success Agg.AggCount
-                    Just "share" -> V.Success Agg.AggShare
-                    Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")")
-            (scope, exchangeType, aggFn) <-
-                case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of
-                    Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))}
-                    Right v -> pure v
-            -- Cross-check requires the parsed scope value, so it runs after the
-            -- Applicative phase. Validation is not a Monad, by design.
-            case (exchangeType, scope) of
-                (Just _, Agg.ScopeBiosphere) ->
-                    throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"}
-                (Just _, Agg.ScopeSupplyChain) ->
-                    throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"}
-                _ -> return ()
-            let presetFilters = expandPreset classificationPresets presetParam
-                explicitFilters = mapMaybe parseClassFilter fclassParams
-                params =
-                    Agg.AggregateParams
-                        { Agg.apScope = scope
-                        , Agg.apIsInput = isInputParam
-                        , Agg.apMaxDepth = maxDepthParam
-                        , Agg.apFilterName = fnameParam
-                        , Agg.apFilterNameNot = maybe [] (map T.strip . T.splitOn ",") fnameNotParam
-                        , Agg.apFilterUnit = funitParam
-                        , Agg.apFilterClassifications = presetFilters ++ explicitFilters
-                        , Agg.apFilterTargetName = ftargetParam
-                        , Agg.apFilterExchangeType = exchangeType
-                        , Agg.apFilterIsReference = freferenceParam
-                        , Agg.apGroupBy = groupByParam
-                        , Agg.apAggregate = aggFn
-                        }
-            unitCfg <- liftIO $ getMergedUnitConfig dbManager
-            (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-            result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params
-            either throwServiceError pure result
-          where
-            -- Parse "System=Value[:exact]" into (system, value, isExact).
-            parseClassFilter :: Text -> Maybe (Text, Text, Bool)
-            parseClassFilter raw =
-                let (sys, rest) = T.breakOn "=" raw
-                 in if T.null rest
-                        then Nothing
-                        else
-                            let valAndMode = T.drop 1 rest
-                                (val, mode) = T.breakOn ":" valAndMode
-                                isExact = T.drop 1 mode == "exact"
-                             in Just (T.strip sys, T.strip val, isExact)
-
-    -- Activity LCIA endpoint (single method within a collection). The GET
-    -- variant honours a top-flows query param and logs the result; the POST
-    -- route carries neither (no top-flows param, no logging).
-    activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult
-    activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        method <- loadMethodByUUID methodIdText
-        (processId, activity) <- resolveOrThrow db processIdText
-        sol <- crossDBSolutionFor dbName db sharedSolver processId mSub
-        result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method
-        when (isNothing mSub) $ liftIO $ logLCIAResult result method
-        pure result
-
-    getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult
-    getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam =
-        activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing
-
-    -- POST: LCIA with substitutions
-    postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult
-    postActivityLCIA dbName processIdText _collectionName methodIdText subReq =
-        activityLCIACore dbName processIdText methodIdText Nothing (Just subReq)
-
-    -- POST: sensitivity sweep (parallel rank-1 perturbations of A_ij)
-    --
-    -- Perturbations are root-only by design (Sherman-Morrison rank-1 on
-    -- root's MUMPS factorization; 'Service.resolveRootOnly' rejects
-    -- "dbName::pid" forms). But scoring the perturbed root scaling must
-    -- still walk the cross-DB graph: dep-DB regional CFs are invisible
-    -- to a root-only 'applyBiosphereMatrix', and the propagation infra
-    -- ('SharedSolver.goWithDepsFromScalings') is already documented for
-    -- this exact use case ("caller supplies root scalings, e.g. after
-    -- a Sherman-Morrison update").
-    --
-    -- Cost: each perturbation now triggers one cross-DB back-substitution
-    -- per dep DB it actually reaches. MUMPS factorizations are cached, so
-    -- back-sub is O(n²) per dep DB, not full O(n³) factorization.
-    postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse
-    postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        requireFullyLinked dbName db
-        method <- loadMethodByUUID methodIdText
-        (processId, activity) <- resolveOrThrow db processIdText
-        eRes <- liftIO $ Service.computeSensitivities db sharedSolver processId (srPerturbations senReq)
-        (baselineX, perResults) <- either throwServiceError pure eRes
-        unitCfg <- liftIO $ getMergedUnitConfig dbManager
-        let depLookup = DM.mkDepSolverLookup dbManager
-            scaleToSolution x = do
-                eSol <-
-                    SharedSolver.goWithDepsFromScalings
-                        unitCfg
-                        depLookup
-                        db
-                        dbName
-                        []
-                        [x]
-                        0
-                pure $ case eSol of
-                    Left err -> Left err
-                    Right (sol : _) -> Right sol
-                    Right [] -> Left "cross-DB propagation returned empty result"
-        eBaselineSol <- liftIO $ scaleToSolution baselineX
-        baselineSol <-
-            either
-                (\err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err})
-                pure
-                eBaselineSol
-        baselineLcia <-
-            liftIO $
-                computeCategoryResult dbManager dbName db baselineSol activity 5 Nothing method
-        perturbed <-
-            liftIO $
-                mapConcurrently
-                    (buildEntry db activity method baselineLcia scaleToSolution)
-                    perResults
-        pure SensitivityResponse{srBaseline = baselineLcia, srPerturbed = perturbed}
-      where
-        buildEntry db activity method baselineLcia scaleToSolution (p, eitherX) = case eitherX of
-            Left err -> pure (PerturbedEntry p (Left err))
-            Right x' -> do
-                eSol <- scaleToSolution x'
-                case eSol of
-                    Left err -> pure (PerturbedEntry p (Left err))
-                    Right sol -> do
-                        lcia <- computeCategoryResult dbManager dbName db sol activity 5 Nothing method
-                        pure (PerturbedEntry p (Right (lcia, lrScore lcia - lrScore baselineLcia)))
-
-    -- Batch LCIA endpoint (all methods in a collection). Thin alias over the
-    -- top-level activityLCIABatchH; preserves the Servant call sites.
-    activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> AppM LCIABatchResult
-    activityLCIABatchCore = activityLCIABatchH
-
-    getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult
-    getActivityLCIABatch dbName processIdText collectionName =
-        activityLCIABatchCore dbName processIdText collectionName Nothing
-
-    -- POST: Batch LCIA with substitutions
-    postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult
-    postActivityLCIABatch dbName processIdText collectionName subReq =
-        activityLCIABatchCore dbName processIdText collectionName (Just subReq)
-
-    -- POST: Inventory with substitutions
-    postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport
-    postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq)
-
-    -- POST: Supply chain with substitutions
-    postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> AppM SupplyChainResponse
-    postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq =
-        activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq)
-
-    -- Activity consumers endpoint (reverse supply chain)
-    getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM ConsumersResponse
-    getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do
-        (db, _) <- requireDatabaseByName dbName
-        let presetFilters = expandPreset classificationPresets presetParam
-            explicitFilters =
-                zipWith3
-                    (\s v m -> (s, v, m == "exact"))
-                    classSystems
-                    classValues
-                    (classModes ++ repeat "contains")
-            classFilters = presetFilters ++ explicitFilters
-            cnf =
-                Service.ConsumerFilter
-                    { Service.cnfCore =
-                        Service.ActivityFilterCore
-                            { Service.afcName = nameFilter
-                            , Service.afcLocation = locationFilter
-                            , Service.afcProduct = productFilter
-                            , Service.afcClassifications = classFilters
-                            , Service.afcLimit = limitParam
-                            , Service.afcOffset = offsetParam
-                            , Service.afcSort = sortParam
-                            , Service.afcOrder = orderParam
-                            }
-                    , Service.cnfMaxDepth = maxDepthParam
-                    , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam
-                    }
-        either throwServiceError pure (Service.getConsumers db dbName processIdText cnf)
-
-    -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity)
-    getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value
-    getActivityPathTo dbName processIdText targetParam = do
-        (db, solver) <- requireDatabaseByName dbName
-        target <-
-            maybe
-                (throwError err400{errBody = "Missing required 'target' query parameter"})
-                pure
-                targetParam
-        result <- liftIO $ Service.getPathTo db solver processIdText target
-        case result of
-            Left (Service.ActivityNotFound msg) ->
-                throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-            Left (Service.InvalidProcessId msg) ->
-                throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg}
-            Left err ->
-                throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
-            Right val -> return val
-
-    -- (resolveOrThrow, throwServiceError, logBatchCategory and loadCollection
-    -- now live at the top level — see the block above the `lcaServer` def.
-    -- Call sites in this `where` pass `dbManager` explicitly when needed.)
-
-    -- Activity analysis endpoint (dispatches to registered analyzers)
-    getActivityAnalyze :: Text -> Text -> Text -> AppM Value
-    getActivityAnalyze dbName processIdText analyzerName = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of
-            Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)}
-            Just analyzer -> do
-                case Service.resolveActivityAndProcessId db processIdText of
-                    Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-                    Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-                    Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
-                    Right (actProcessId, _) -> do
-                        inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
-                        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                        loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
-                        let methods = map snd loadedMethods
-                            ctx =
-                                AnalyzeContext
-                                    { acDatabase = db
-                                    , acInventory = inventory
-                                    , acMethods = methods
-                                    , acParameters = M.empty
-                                    , acTechFlowDB = M.empty
-                                    , acBioFlowDB = mFlows
-                                    , acUnitDB = mUnits
-                                    }
-                        liftIO $ ahAnalyze analyzer ctx
-
-    -- Contributing flows: top elementary flows by LCIA contribution for a specific method
-    getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult
-    getContributingFlows dbName processIdText _collectionName methodIdText limitParam =
-        withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
-            let lim = fromMaybe 20 limitParam
-            unitCfg <- liftIO $ getMergedUnitConfig dbManager
-            (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-            inventory <- inventoryWithDeps dbName db sharedSolver actProcessId
-            tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
-            let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables)
-                (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables
-                contribs = sortOn (\(_, _, c) -> negate (abs c)) rawContribs
-                topFlows =
-                    [ FlowContributionEntry
-                        { fcoFlowName = bfName f
-                        , fcoContribution = c
-                        , fcoSharePct = if score /= 0 then c / score * 100 else 0
-                        , fcoFlowId = UUID.toText (bfId f)
-                        , fcoCategory = bfCompartmentName f
-                        , fcoCompartment = bfCompartmentSub f
-                        , fcoCfValue = cfVal
-                        }
-                    | (f, cfVal, c) <- take lim contribs
-                    ]
-            liftIO $
-                unless (null unknownUuids) $
-                    reportProgress Warning $
-                        "[contributing-flows "
-                            <> T.unpack (methodName method)
-                            <> "] "
-                            <> show (length unknownUuids)
-                            <> " inventory flow UUID(s) absent from merged FlowDB. Samples: "
-                            <> show (take 3 unknownUuids)
-            return
-                ContributingFlowsResult
-                    { cfrMethod = methodName method
-                    , cfrUnit = methodUnit method
-                    , cfrTotalScore = score
-                    , cfrTopFlows = topFlows
-                    }
-
-    -- Contributing activities: top upstream activities by LCIA contribution for a specific method
-    getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult
-    getContributingActivities dbName processIdText _collectionName methodIdText limitParam =
-        withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do
-            let lim = fromMaybe 10 limitParam
-            requireFullyLinked dbName db
-            unitCfg <- liftIO $ getMergedUnitConfig dbManager
-            (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-            tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
-            -- Skip separate inventory compute: contributions sum equals the
-            -- score (same B·scaling·CF sum, just grouped per activity).
-            eContribs <-
-                liftIO $
-                    SharedSolver.crossDBProcessContributions
-                        unitCfg
-                        mUnits
-                        mFlows
-                        (DM.mkDepSolverLookup dbManager)
-                        db
-                        dbName
-                        sharedSolver
-                        actProcessId
-                        tables
-            case eContribs of
-                Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err}
-                Right contributions -> do
-                    let score = sum (M.elems contributions)
-                        sorted = sortOn (\(_, c) -> negate (abs c)) (M.toList contributions)
-                        top = take lim sorted
-                    rows <- liftIO $ mapM (mkCrossDBContrib dbManager dbName mFlows mUnits score) top
-                    return
-                        ContributingActivitiesResult
-                            { carMethod = methodName method
-                            , carUnit = methodUnit method
-                            , carTotalScore = score
-                            , carActivities = rows
-                            }
-
-    -- Score every method in a set against one inventory in a single batched
-    -- pass. For fully non-regionalized sets (PEF) this is a stacked-broadcast
-    -- matvec — one walk over the inventory, m FMAs per non-zero entry —
-    -- instead of m separate inventory walks. Mixed/regio sets fall through
-    -- to the per-DB cross-DB regional sum inside the set-scoring function.
-    --
-    -- The 'CrossDBSolution' carries the merged inventory and per-DB scaling
-    -- vectors collected during the inventory solve. Regional methods score
-    -- as a sum across all participating DBs (root + each dep DB reached at
-    -- request time); non-regional methods read the merged inventory only.
-    -- Flow detail endpoint
-    getFlowDetail :: Text -> Text -> AppM FlowDetail
-    getFlowDetail dbName flowIdText = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedFlow db flowIdText $ \flow -> do
-            let fid = flowKindId flow
-                unitName' = flowKindUnitName (dbUnits db) flow
-                usageCount = Service.getFlowUsageCount db fid
-            return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount
-
-    -- Activities using a specific flow
-    getFlowActivities :: Text -> Text -> AppM [ActivitySummary]
-    getFlowActivities dbName flowIdText = do
-        (db, _) <- requireDatabaseByName dbName
-        withValidatedFlow db flowIdText $ \flow ->
-            return $ Service.getActivitiesUsingFlow db (flowKindId flow)
-
-    -- List all available methods (from loaded collections)
-    getMethods :: AppM [MethodSummary]
-    getMethods = do
-        loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
-        return
-            [ MethodSummary
-                { msmId = methodId m
-                , msmName = methodName m
-                , msmCategory = methodCategory m
-                , msmUnit = methodUnit m
-                , msmFactorCount = length (methodFactors m)
-                , msmCollection = collName
-                }
-            | (collName, m) <- loadedMethods
-            ]
-
-    -- Get method details
-    getMethodDetail :: Text -> AppM MethodDetail
-    getMethodDetail methodIdText = do
-        method <- loadMethodByUUID methodIdText
-        return $
-            MethodDetail
-                { mdId = methodId method
-                , mdName = methodName method
-                , mdDescription = methodDescription method
-                , mdUnit = methodUnit method
-                , mdCategory = methodCategory method
-                , mdMethodology = methodMethodology method
-                , mdFactorCount = length (methodFactors method)
-                }
-
-    -- Get method characterization factors
-    getMethodFactors :: Text -> AppM [MethodFactorAPI]
-    getMethodFactors methodIdText = do
-        method <- loadMethodByUUID methodIdText
-        return $ map cfToAPI (methodFactors method)
-
-    -- Get method flow mapping status
-    getMethodMapping :: Text -> Text -> AppM MappingStatus
-    getMethodMapping dbName methodIdText = do
-        (db, _) <- requireDatabaseByName dbName
-        method <- loadMethodByUUID methodIdText
-        mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
-        let stats = computeMappingStats mappings
-            totalFactors = length mappings
-            coverage =
-                if totalFactors > 0
-                    then fromIntegral (totalFactors - msUnmatched stats) / fromIntegral totalFactors * 100
-                    else 0.0
-            -- Get unmapped flows (limit to first 50 for API response)
-            unmappedFlows =
-                take
-                    50
-                    [ UnmappedFlowAPI
-                        { ufaFlowRef = mcfFlowRef cf
-                        , ufaFlowName = mcfFlowName cf
-                        , ufaDirection = case mcfDirection cf of
-                            MT.Input -> "Input"
-                            MT.Output -> "Output"
-                        }
-                    | (cf, Nothing) <- mappings
-                    ]
-            uniqueDbFlows = S.size $ S.fromList [bfId f | (_, Just (f, _)) <- mappings]
-        return
-            MappingStatus
-                { mstMethodId = methodId method
-                , mstMethodName = methodName method
-                , mstTotalFactors = msTotal stats
-                , mstMappedByUUID = msByUUID stats
-                , mstMappedByCAS = msByCAS stats
-                , mstMappedByName = msByName stats
-                , mstMappedBySynonym = msBySynonym stats
-                , mstUnmapped = msUnmatched stats
-                , mstCoverage = coverage
-                , mstDbBiosphereCount = fromIntegral (dbBiosphereCount db)
-                , mstUniqueDbFlowsMatched = uniqueDbFlows
-                , mstUnmappedFlows = unmappedFlows
-                }
-
-    -- DB-flow-centric mapping: all biosphere flows with their CF assignments
-    getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping
-    getFlowCFMapping dbName methodIdText = do
-        (db, _) <- requireDatabaseByName dbName
-        method <- loadMethodByUUID methodIdText
-        mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
-        let
-            -- Build reverse index: DB flow UUID → (MethodCF, MatchStrategy)
-            reverseIndex =
-                M.fromList
-                    [(bfId f, (cf, strat)) | (cf, Just (f, strat)) <- mappings]
-            -- Build entries for all biosphere flows
-            entries = map (buildFlowEntry db reverseIndex) (V.toList (dbBiosphereOrder db))
-            matchedCount = length [() | e <- entries, isJust (fceCfValue e)]
-        return
-            FlowCFMapping
-                { fcmMethodName = methodName method
-                , fcmMethodUnit = methodUnit method
-                , fcmTotalFlows = fromIntegral (dbBiosphereCount db)
-                , fcmMatchedFlows = matchedCount
-                , fcmFlows = entries
-                }
-
-    buildFlowEntry :: Database -> M.Map UUID (MethodCF, MatchStrategy) -> UUID -> FlowCFEntry
-    buildFlowEntry db reverseIndex uuid =
-        let mFlow = M.lookup uuid (dbBioFlows db)
-            mMatch = M.lookup uuid reverseIndex
-         in FlowCFEntry
-                { fceFlowId = uuid
-                , fceFlowName = maybe "" bfName mFlow
-                , fceFlowCategory = maybe "" bfCompartmentName mFlow
-                , fceCfValue = fmap (mcfValue . fst) mMatch
-                , fceCfFlowName = fmap (mcfFlowName . fst) mMatch
-                , fceMatchStrategy = fmap (strategyToText . snd) mMatch
-                }
-
-    strategyToText :: MatchStrategy -> Text
-    strategyToText ByUUID = "uuid"
-    strategyToText ByCAS = "cas"
-    strategyToText ByName = "name"
-    strategyToText BySynonym = "synonym"
-    strategyToText ByFuzzy = "fuzzy"
-    strategyToText NoMatch = "none"
-
-    -- Characterization: matched CFs for a method, filterable by flow name
-    getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult
-    getCharacterization dbName methodIdText flowFilter limitParam = do
-        (db, _) <- requireDatabaseByName dbName
-        method <- loadMethodByUUID methodIdText
-        let lim = fromMaybe 50 limitParam
-            queryLower = fmap T.toLower flowFilter
-        mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
-        let matched =
-                [ (cf, f, strat)
-                | (cf, Just (f, strat)) <- mappings
-                , matchesQuery queryLower (mcfFlowName cf) (bfName f)
-                ]
-            sorted = sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched
-            top = take lim sorted
-            mkEntry (cf, f, strat) =
-                CharacterizationEntry
-                    { cheMethodFlowName = mcfFlowName cf
-                    , cheCfValue = mcfValue cf
-                    , cheCfUnit = mcfUnit cf
-                    , cheDirection = case mcfDirection cf of
-                        MT.Input -> "Input"
-                        MT.Output -> "Output"
-                    , cheDbFlowName = bfName f
-                    , cheFlowId = UUID.toText (bfId f)
-                    , cheFlowUnit = getUnitNameForBioFlow (dbUnits db) f
-                    , cheCategory = bfCompartmentName f
-                    , cheCompartment = bfCompartmentSub f
-                    , cheMatchStrategy = strategyToText strat
-                    }
-        return
-            CharacterizationResult
-                { chrMethod = methodName method
-                , chrUnit = methodUnit method
-                , chrMatches = length matched
-                , chrShown = length top
-                , chrFactors = map mkEntry top
-                }
-
-    matchesQuery :: Maybe Text -> Text -> Text -> Bool
-    matchesQuery Nothing _ _ = True
-    matchesQuery (Just q) cfName dbName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbName)
-
-    -- Helper to load a method by UUID from the loaded collections
-    loadMethodByUUID :: Text -> AppM Method
-    loadMethodByUUID uuidText = do
-        loadedMethods <- liftIO $ DM.getLoadedMethods dbManager
-        let allMethods = map snd loadedMethods
-        case UUID.fromText uuidText of
-            Nothing -> throwError err400{errBody = "Invalid method UUID format"}
-            Just uuid ->
-                case filter (\m -> methodId m == uuid) allMethods of
-                    (m : _) -> return m
-                    [] -> throwError err404{errBody = "Method not found"}
-
-    -- Resolve (database, shared solver, ProcessId, Activity, Method) from URL path params
-    -- and dispatch to the continuation. Maps the three Service errors to standard HTTP codes.
-    withActivityAndMethod ::
-        Text ->
-        Text ->
-        Text ->
-        (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a) ->
-        AppM a
-    withActivityAndMethod dbName processIdText methodIdText k = do
-        (db, sharedSolver) <- requireDatabaseByName dbName
-        method <- loadMethodByUUID methodIdText
-        case Service.resolveActivityAndProcessId db processIdText of
-            Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"}
-            Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"}
-            Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
-            Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method
-
-    -- Method collection handlers
-    getMethodCollections :: AppM MethodCollectionListResponse
-    getMethodCollections = do
-        statuses <- liftIO $ DM.listMethodCollections dbManager
-        return $
-            MethodCollectionListResponse
-                [ MethodCollectionStatusAPI
-                    { mcaName = mcsName s
-                    , mcaDisplayName = mcsDisplayName s
-                    , mcaDescription = mcsDescription s
-                    , mcaStatus = case mcsStatus s of
-                        DM.Loaded -> "loaded"
-                        _ -> "unloaded"
-                    , mcaIsUploaded = mcsIsUploaded s
-                    , mcaPath = mcsPath s
-                    , mcaMethodCount = mcsMethodCount s
-                    , mcaFormat = Just (mcsFormat s)
-                    }
-                | s <- statuses
-                ]
-
-    loadMethodCollectionHandler :: Text -> AppM ActivateResponse
-    loadMethodCollectionHandler name =
-        simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name)
-
-    unloadMethodCollectionHandler :: Text -> AppM ActivateResponse
-    unloadMethodCollectionHandler name =
-        simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name)
-
-    -- Helper to convert MethodCF to API type
-    cfToAPI :: MethodCF -> MethodFactorAPI
-    cfToAPI cf =
-        MethodFactorAPI
-            { mfaFlowRef = mcfFlowRef cf
-            , mfaFlowName = mcfFlowName cf
-            , mfaDirection = case mcfDirection cf of
-                MT.Input -> "Input"
-                MT.Output -> "Output"
-            , mfaValue = mcfValue cf
-            }
-
-    -- Search flows by name or synonym with optional language filtering and pagination
-    searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult)
-    searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do
-        (db, _) <- requireDatabaseByName dbName
-        case queryParam of
-            Nothing -> return (SearchResults [] 0 0 50 False 0.0)
-            Just query -> do
-                let ff =
-                        Service.FlowFilter
-                            { Service.ffQuery = query
-                            , Service.ffLang = langParam
-                            , Service.ffLimit = limitParam
-                            , Service.ffOffset = offsetParam
-                            , Service.ffSort = sortParam
-                            , Service.ffOrder = orderParam
-                            }
-                searchFlowsInternal db ff
-
-    -- Search activities by specific fields with pagination and count
-    searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary)
-    searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do
-        (db, _) <- requireDatabaseByName dbName
-        -- Expand preset filters then merge with explicit classification params
-        let exactMatch = fromMaybe False exactParam
-            presetFilters = expandPreset classificationPresets presetParam
-            explicitFilters =
-                zipWith3
-                    (\s v m -> (s, v, m == "exact"))
-                    classSystems
-                    classValues
-                    (classModes ++ repeat "contains")
-            classFilters = presetFilters ++ explicitFilters
-        let sf =
-                Service.SearchFilter
-                    { Service.sfCore =
-                        Service.ActivityFilterCore
-                            { Service.afcName = nameParam
-                            , Service.afcLocation = geoParam
-                            , Service.afcProduct = productParam
-                            , Service.afcClassifications = classFilters
-                            , Service.afcLimit = limitParam
-                            , Service.afcOffset = offsetParam
-                            , Service.afcSort = sortParam
-                            , Service.afcOrder = orderParam
-                            }
-                    , Service.sfExactMatch = exactMatch
-                    }
-        result <- liftIO $ Service.searchActivities db sf
-        case result of
-            Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err}
-            Right jsonValue -> case fromJSON jsonValue of
-                Success searchResults -> return searchResults
-                Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr}
-
-    getClassifications :: Text -> AppM [ClassificationSystem]
-    getClassifications dbName = do
-        (db, _) <- requireDatabaseByName dbName
-        return $ Service.getClassifications db
-
-    -- Batch impacts: thin alias over the top-level batchImpactsH.
-    postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse
-    postImpactsBatch = batchImpactsH
 
 {- | Evaluate every scoring set against the raw impact score map.
 Returns (setName → scoreName → value, setName → varName → ScoringIndicator).
diff --git a/src/App/Env.hs b/src/App/Env.hs
index 9cd1d3b6..e02d6ba6 100644
--- a/src/App/Env.hs
+++ b/src/App/Env.hs
@@ -1,42 +1,13 @@
 {-# LANGUAGE DerivingStrategies #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE RankNTypes #-}
 
-{- | The 'AppM' capability monad and the read-only environment that every
-HTTP handler closes over.
-
-The categorical structure: 'AppM' is a 'Reader' monad transformer over
-Servant's 'Handler', so it lives in the Kleisli category of @Reader
-AppEnv@ lifted into @Handler@'s Kleisli category. 'hoistServer' (used
-in 'API.Routes.lcaServer') is the natural transformation
-@forall a. AppM a -> Handler a@ that turns a @ServerT api AppM@ into a
-@ServerT api Handler@ — Servant doesn't care about the monad, as long
-as we can collapse it down to 'Handler' at the boundary.
-
-The 'Has*' typeclasses are narrow capability witnesses: a function with
-@(MonadReader r m, HasDatabaseManager r, MonadIO m) => m a@ declares
-exactly the slice of the environment it needs, without committing to
-the concrete monad. This is the "Has-pattern" of mtl-style: capability
-classes are *projections* out of the environment object.
--}
+-- | 'AppM' is @'ReaderT' 'AppEnv' 'Handler'@; 'runApp' is the @AppM ~> Handler@
+-- mapping passed to Servant's 'hoistServer'.
 module App.Env (
-    -- * Environment
     AppEnv (..),
     mkAppEnv,
-
-    -- * Monad
     AppM (..),
     runApp,
-
-    -- * Capability classes (Has-pattern)
-    HasDatabaseManager (..),
-    HasMaxTreeDepth (..),
-    HasPassword (..),
-    HasHostingConfig (..),
-    HasClassificationPresets (..),
 ) where
 
 import Control.Monad.Except (MonadError)
@@ -55,7 +26,6 @@ data AppEnv = AppEnv
     , aeClassificationPresets :: ![Config.ClassificationPreset]
     }
 
--- | Smart constructor — keeps callers from positionally swapping fields.
 mkAppEnv
     :: DatabaseManager
     -> Int
@@ -65,65 +35,8 @@ mkAppEnv
     -> AppEnv
 mkAppEnv = AppEnv
 
-{- | Servant 'Handler' threaded with a read-only 'AppEnv'. Deriving the
-@MonadReader@ / @MonadIO@ / @MonadError ServerError@ instances via
-@GeneralizedNewtypeDeriving@ keeps the wrapper free at runtime: 'AppM'
-is representationally a function @AppEnv -> IO (Either ServerError a)@
-under the hood, identical to 'Handler' modulo the @AppEnv@ argument.
--}
 newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a}
     deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError)
 
-{- | Discharge an 'AppM' computation against a concrete environment,
-producing a plain Servant 'Handler'. Use in 'API.Routes.lcaServer' as
-@hoistServer lcaAPI (runApp env) handlers@.
-
-This is the natural transformation @AppM ~> Handler@ that Servant's
-@hoistServer@ requires; it lifts the entire @ServerT api AppM@ into
-@ServerT api Handler@ point-free.
--}
 runApp :: AppEnv -> AppM a -> Handler a
 runApp env (AppM m) = runReaderT m env
-
--- ---------------------------------------------------------------------------
--- Has-pattern: narrow capability witnesses
--- ---------------------------------------------------------------------------
-
-{- | Witness that the environment exposes a 'DatabaseManager'. Handlers
-that need DB access should constrain on @HasDatabaseManager r@ rather
-than the concrete 'AppEnv', so the same code can run in tests with a
-narrower env.
--}
-class HasDatabaseManager r where
-    getDatabaseManager :: r -> DatabaseManager
-
-instance HasDatabaseManager AppEnv where
-    getDatabaseManager = aeDbManager
-
--- | Max tree depth limit (anti-DoS guard for /tree and /graph endpoints).
-class HasMaxTreeDepth r where
-    getMaxTreeDepth :: r -> Int
-
-instance HasMaxTreeDepth AppEnv where
-    getMaxTreeDepth = aeMaxTreeDepth
-
--- | Optional admin password gating @POST /auth@.
-class HasPassword r where
-    getPassword :: r -> Maybe String
-
-instance HasPassword AppEnv where
-    getPassword = aePassword
-
--- | Hosting configuration consumed by @GET /hosting@.
-class HasHostingConfig r where
-    getHostingConfig :: r -> Maybe Config.HostingConfig
-
-instance HasHostingConfig AppEnv where
-    getHostingConfig = aeHostingConfig
-
--- | Classification presets used in @/aggregate@ and @/supply-chain@ filters.
-class HasClassificationPresets r where
-    getClassificationPresets :: r -> [Config.ClassificationPreset]
-
-instance HasClassificationPresets AppEnv where
-    getClassificationPresets = aeClassificationPresets
diff --git a/src/CLI/Parser.hs b/src/CLI/Parser.hs
index b41d6495..8b6093d3 100644
--- a/src/CLI/Parser.hs
+++ b/src/CLI/Parser.hs
@@ -11,13 +11,8 @@ import qualified Options.Applicative as OA
 import Version (buildTarget, gitHash, gitTag, version)
 
 -- ---------------------------------------------------------------------------
--- Option-builder helpers
---
--- optparse-applicative is the textbook /Free Applicative/ over a primitive
--- @Parser@: composition with @\<*\>@ keeps the static structure exposed
--- (which is why @--help@ generation works), and the helpers below factor
--- the four shapes that dominated this file before — Text option, Int
--- option, switch, positional Text arg — into one line each.
+-- Option-builder helpers: collapses the @long/short/metavar/help@ boilerplate
+-- around the four shapes (Text/Int option, Text/String positional arg).
 -- ---------------------------------------------------------------------------
 
 -- | @strOption@ with @long/metavar/help@, optionally a short alias.
diff --git a/src/Data/Validation.hs b/src/Data/Validation.hs
index 43efd733..bec7eea7 100644
--- a/src/Data/Validation.hs
+++ b/src/Data/Validation.hs
@@ -1,12 +1,6 @@
-{- | An Applicative-only validation type that accumulates errors via the
-'Semigroup' on @e@ — the canonical example, in Milewski's
-/Category Theory for Programmers/, of an Applicative that is /not/ a
-Monad. A lawful 'Monad' instance for 'Validation' would have to
-short-circuit on the first 'Failure' (to make @>>=@ associative w.r.t.
-the underlying 'Either' behaviour); that would erase the accumulation
-that motivates the type, so we stop at 'Applicative'.
-
-Use:
+{- | Applicative-only validation that accumulates errors via the 'Semigroup'
+on @e@. No 'Monad' instance: the @ap = (\<*\>)@ law would force '<*>' to
+short-circuit, destroying accumulation.
 
 @
     validateAll
@@ -14,8 +8,7 @@ Use:
     validateAll a b = (,) \<$\> validateA a \<*\> validateB b
 @
 
-When both @validateA@ and @validateB@ fail, the resulting 'Failure'
-carries /both/ messages, not just the first.
+When both arms fail, the resulting 'Failure' carries both messages.
 -}
 module Data.Validation (
     Validation (..),
diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs
index 819cf06e..a3f74d9e 100644
--- a/src/Database/Loader.hs
+++ b/src/Database/Loader.hs
@@ -45,7 +45,6 @@ module Database.Loader (
     fixActivityLinksWithCrossDB,
     findAllCrossDBLinks,
     CrossDBLinkingStats (..),
-    mempty,
     crossDBLinksCount,
     unresolvedCount,
     crossDBBySource,
@@ -232,14 +231,9 @@ data UnlinkedExchange = UnlinkedExchange
     }
     deriving (Eq, Ord, Show)
 
-{- | Summary of unlinked exchanges grouped by consumer activity.
-
-Categorically the product of four monoids:
-'M.unionWith (++)' on the activity map, and addition on each 'Int'
-counter. We hand-write the instance because bare 'Int' has no canonical
-'Monoid' (Sum vs Product is ambiguous); wrapping the fields as 'Sum Int'
-would have leaked through every constructor and accessor.
--}
+-- | Summary of unlinked exchanges grouped by consumer activity.
+-- 'Monoid' is hand-written: bare 'Int' has no canonical instance, and using
+-- 'Sum Int' would force every reader to unwrap.
 data UnlinkedSummary = UnlinkedSummary
     { usActivities :: !(M.Map T.Text [UnlinkedExchange]) -- consumer name → list of unlinked exchanges
     , usTotalLinks :: !Int
diff --git a/src/Service.hs b/src/Service.hs
index 3f51c653..de004c89 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -341,10 +341,7 @@ getActivityInventoryWithSharedSolver validators sharedSolver db processIdText =
                                     let inventoryExport = convertToInventoryExport db (dbBioFlows db) (dbUnits db) processId activity inventory
                                     return $ Right inventoryExport
 
--- | Simple stats tracking for tree processing
--- | Tree-traversal counters (total nodes / loop nodes / leaf nodes). The
--- 'Semigroup' / 'Monoid' instance is the product of three Sum-Int monoids,
--- hand-written to keep the bare 'Int' constructor positions ergonomic.
+-- | Tree-traversal counters (total nodes / loop nodes / leaf nodes).
 data TreeStats = TreeStats Int Int Int -- total, loops, leaves
 
 instance Semigroup TreeStats where
diff --git a/src/Types.hs b/src/Types.hs
index 8b3bcde6..a99ea514 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -1116,12 +1116,10 @@ data CrossDBLinkingStats = CrossDBLinkingStats
     }
     deriving (Generic, NFData, Store)
 
-{- | Product of monoids, componentwise: lists concat, the count\/blocker
-map unions (counts summed, first blocker wins as a tiebreaker), the set
-unions, the @Int@ counter sums. Hand-written rather than @via Generically@
-because bare @Int@ has no canonical 'Monoid' (Sum vs Product is ambiguous)
-and the @(Int, LinkBlocker)@ map value mixes a 'Monoid' with a non-'Monoid'.
--}
+-- | Field-wise '<>'. On unresolved-product collision counts are summed
+-- and the first 'LinkBlocker' wins (tiebreaker). Hand-written: bare 'Int'
+-- has no canonical 'Monoid', and the @(Int, LinkBlocker)@ map value is
+-- not itself a 'Monoid'.
 instance Semigroup CrossDBLinkingStats where
     s1 <> s2 =
         CrossDBLinkingStats

From 666613d024efb46392dd01103cbb30ac12bd446a Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Tue, 26 May 2026 23:32:09 +0200
Subject: [PATCH 22/43] refactor(mapping): MappingStats Monoid, foldMap stats,
 Alternative cascade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two patterns at the heart of the LCIA mapping kernel that the previous
code spelled out the long way.

1. MappingStats becomes a Monoid (field-wise sum, all-zero identity).
   computeMappingStats was seven independent walks over the mapping list
   (one `length mappings` + five `length . filter ((== Just s) . ...)` +
   one `length . filter (isNothing . snd)`). It is now one `foldMap`
   pass with an exhaustive case on MatchStrategy.

   - 7×|mappings| traversals → 1×|mappings|.
   - The strategy switch is exhaustive: adding a new `MatchStrategy`
     constructor is now a compile error in `tally` until it gets a row.
     The old `(== Just strategy) . fmap snd . snd` form silently
     under-counted any new variant.
   - MappingStats now composes via `<>` and `mconcat`, useful for any
     future cross-method or cross-DB aggregation.

2. lookupCascadeCF was a hand-unrolled UUID → exact → fallback ladder
   nested four `case Maybe` levels deep. With Maybe's Alternative the
   cascade IS the algorithm:

       M.lookup fid (mtUuidCF tables)
         <|> (M.lookup fid flowDB >>= byName)

   where `byName` ends with

       M.lookup (name, baseMed, normSub) (mtExactCF tables)
         <|> M.lookup (name, baseMed)    (mtFallbackCF tables)

   No behaviour change. Five fewer indentation levels in the body, and
   the priority order reads top-to-bottom on the page.

Tests: 1052/1052 hspec green, openapi.json byte-identical (md5 matches).
---
 src/Method/Mapping.hs | 84 +++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/src/Method/Mapping.hs b/src/Method/Mapping.hs
index 026e6f2f..e14c6662 100644
--- a/src/Method/Mapping.hs
+++ b/src/Method/Mapping.hs
@@ -67,6 +67,7 @@ module Method.Mapping (
     computeMappingStats,
 ) where
 
+import Control.Applicative ((<|>))
 import Control.DeepSeq (NFData)
 import Control.Monad.ST (runST)
 import Data.Aeson (ToJSON)
@@ -114,7 +115,9 @@ data MatchStrategy
       NoMatch
     deriving (Eq, Show)
 
--- | Statistics about mapping results
+-- | Per-strategy mapping counters. Forms a 'Monoid' (field-wise sum, all-zero
+-- identity) so per-batch stats compose with '<>' and 'computeMappingStats'
+-- is a single 'foldMap' pass over the mapping list.
 data MappingStats = MappingStats
     { msTotal :: !Int
     -- ^ Total CFs in method
@@ -133,6 +136,20 @@ data MappingStats = MappingStats
     }
     deriving (Eq, Show)
 
+instance Semigroup MappingStats where
+    a <> b =
+        MappingStats
+            (msTotal a + msTotal b)
+            (msByUUID a + msByUUID b)
+            (msByCAS a + msByCAS b)
+            (msByName a + msByName b)
+            (msBySynonym a + msBySynonym b)
+            (msByFuzzy a + msByFuzzy b)
+            (msUnmatched a + msUnmatched b)
+
+instance Monoid MappingStats where
+    mempty = MappingStats 0 0 0 0 0 0 0
+
 -- | Build a MapContext from a Database (convenience for callers)
 buildMapContext :: Database -> MapContext
 buildMapContext db =
@@ -252,20 +269,20 @@ pickByCompartment (f : fs) (Just comp) = Just $
         | med `T.isInfixOf` cat = True
         | otherwise = False
 
--- | Compute statistics about mapping results
+-- | Per-strategy counts of mapping results in one pass.
+-- Each 'MatchStrategy' must be named below — adding a new variant is a
+-- compile error here until it gets a row.
 computeMappingStats :: [(MethodCF, Maybe (BiosphereFlow, MatchStrategy))] -> MappingStats
-computeMappingStats mappings =
-    MappingStats
-        { msTotal = length mappings
-        , msByUUID = count ByUUID
-        , msByCAS = count ByCAS
-        , msByName = count ByName
-        , msBySynonym = count BySynonym
-        , msByFuzzy = count ByFuzzy
-        , msUnmatched = length $ filter (isNothing . snd) mappings
-        }
+computeMappingStats = foldMap (tally . fmap snd . snd)
   where
-    count strategy = length $ filter ((== Just strategy) . fmap snd . snd) mappings
+    one = mempty{msTotal = 1}
+    tally Nothing = one{msUnmatched = 1}
+    tally (Just ByUUID) = one{msByUUID = 1}
+    tally (Just ByCAS) = one{msByCAS = 1}
+    tally (Just ByName) = one{msByName = 1}
+    tally (Just BySynonym) = one{msBySynonym = 1}
+    tally (Just ByFuzzy) = one{msByFuzzy = 1}
+    tally (Just NoMatch) = one{msUnmatched = 1}
 
 {- | Precomputed CF lookup tables for one (database, method) pair.
 Building these from raw mappings is O(n log n) over thousands of CFs, so they
@@ -1088,28 +1105,25 @@ against ILCD-style bare media without requiring an explicit (medium, sub)
 pair for every combination.
 -}
 lookupCascadeCF :: MethodTables -> BioFlowDB -> UUID -> Maybe (Double, Text)
-lookupCascadeCF tables flowDB fid = case M.lookup fid (mtUuidCF tables) of
-    Just cfv -> Just cfv
-    Nothing -> case M.lookup fid flowDB of
-        Nothing -> Nothing
-        Just flow ->
-            let name = normalizeName (bfName flow)
-                rawCategory = T.toLower (VT.bfCompartmentName flow)
-                (rawMed, rawSubFromCat) = case T.breakOn "/" rawCategory of
-                    (m, rest)
-                        | T.null rest -> (m, T.empty)
-                        | otherwise -> (m, T.drop 1 rest)
-                rawSub =
-                    let s = T.toLower (fromMaybe T.empty (VT.bfCompartmentSub flow))
-                     in if T.null s then rawSubFromCat else s
-                Compartment normMedRaw normSub _ =
-                    normalizeCompartment (mtCompartmentMap tables) (Compartment rawMed rawSub T.empty)
-                baseMed = normalizeMedium normMedRaw
-                subcomp = normSub
-                exact = M.lookup (name, baseMed, subcomp) (mtExactCF tables)
-             in case exact of
-                    Just _ -> exact
-                    Nothing -> M.lookup (name, baseMed) (mtFallbackCF tables)
+lookupCascadeCF tables flowDB fid =
+    M.lookup fid (mtUuidCF tables)
+        <|> (M.lookup fid flowDB >>= byName)
+  where
+    byName flow =
+        let name = normalizeName (bfName flow)
+            rawCategory = T.toLower (VT.bfCompartmentName flow)
+            (rawMed, rawSubFromCat) = case T.breakOn "/" rawCategory of
+                (m, rest)
+                    | T.null rest -> (m, T.empty)
+                    | otherwise -> (m, T.drop 1 rest)
+            rawSub =
+                let s = T.toLower (fromMaybe T.empty (VT.bfCompartmentSub flow))
+                 in if T.null s then rawSubFromCat else s
+            Compartment normMedRaw normSub _ =
+                normalizeCompartment (mtCompartmentMap tables) (Compartment rawMed rawSub T.empty)
+            baseMed = normalizeMedium normMedRaw
+         in M.lookup (name, baseMed, normSub) (mtExactCF tables)
+                <|> M.lookup (name, baseMed) (mtFallbackCF tables)
 
 -- | Normalize medium names between method CFs and database flows.
 normalizeMedium :: Text -> Text

From 047f5fe844ec47ceb12f278714c73889ee27a0cf Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 11:30:01 +0200
Subject: [PATCH 23/43] refactor(database): extract Database.MatrixBuild from
 buildDatabaseWithMatrices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function was a ~220-line do mixing UUID interning, two matrix
builders, the product index, and the final Database assembly. Every
pure phase moves into a new Database.MatrixBuild; the IO entrypoint
becomes a thin log → call pure helpers → assemble record sequence
(-186/+27 in Database.hs).

Patterns line up with the ones 666613d named for the mapping kernel:

- findProducer is Alternative on Maybe — the cascade IS the algorithm:

      exchangeProcessLinkId ex
        <|> (exchangeActivityLinkId ex
              >>= \a -> M.lookup (a, exchangeFlowId ex) lkp)

  Replaces a four-level nested case Maybe ladder.

- buildTechTriples uses traverse over (Either Text), which
  short-circuits on the first unit-conversion error. Tuple-Monoid + fold
  accumulates (triples, warnings) without partitioning Lefts and Rights
  by hand.

- perActivity flattens (activity, exchange) iteration once. Tech and bio
  each carried a near-identical "fetch activity / key / normFactor / fold
  exchanges" wrapper; they now share it. Drops the dead _activity arg
  from the bio builder along the way.

- Misc: safeDenom names the divide-by-zero guard used by both matrices;
  the double convertUnit call collapses into one match on
  (needsConversion, convertUnit ...); S.toAscList replaces
  sort . S.toList . S.fromList.

Behaviour identical. Tests: 1052/1052 hspec green; the matrix log lines
(activity count, technosphere / biosphere non-zero entries) match the
pre-refactor numbers on every fixture.
---
 src/Database.hs             | 212 ++++----------------------------
 src/Database/MatrixBuild.hs | 234 ++++++++++++++++++++++++++++++++++++
 volca.cabal                 |   1 +
 3 files changed, 261 insertions(+), 186 deletions(-)
 create mode 100644 src/Database/MatrixBuild.hs

diff --git a/src/Database.hs b/src/Database.hs
index ff0d55d1..cb8abb71 100644
--- a/src/Database.hs
+++ b/src/Database.hs
@@ -4,23 +4,20 @@
 
 module Database where
 
-import Data.Either (lefts, rights)
-import Data.Int (Int32)
 import qualified Data.IntSet as IS
-import Data.List (sort)
 import qualified Data.Map as M
 import qualified Data.Set as S
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.UUID as UUID
 import qualified Data.Vector as V
 import qualified Data.Vector.Unboxed as VU
+import Database.MatrixBuild
 import Progress
 import qualified Search.BM25.Types as BM25T
 import qualified Search.Fuzzy as Fuzzy
 import qualified Search.Normalize as Normalize
 import Types
-import UnitConversion (UnitConfig, convertUnit, normalizeUnit)
+import UnitConversion (UnitConfig)
 
 {- | Build complete database with pre-computed sparse matrices
 
@@ -45,204 +42,47 @@ Matrix Construction:
 buildDatabaseWithMatrices :: UnitConfig -> M.Map (UUID, UUID) Activity -> TechFlowDB -> BioFlowDB -> WasteFlowDB -> UnitDB -> IO (Either Text Database)
 buildDatabaseWithMatrices unitConfig activityMap techFlowDB bioFlowDB wasteFlowDB unitDB = do
     reportMatrixOperation "Building database with pre-computed sparse matrices"
+    let !tables = buildInterningTables activityMap
+        !supplierRefUnits = buildSupplierRefUnits unitDB (itActivities tables)
+        !indexes = buildIndexesWithProcessIds (itActivities tables) (itProcessIdTable tables)
+        activityCount = itActivityCount tables
 
-    -- Step 1: Build UUID interning tables from Map keys
-    let activityKeys = M.keys activityMap
-        sortedKeys = sort activityKeys -- Ensure deterministic ordering
-
-        -- Build forward lookup: ProcessId (Int32) -> (UUID, UUID)
-        dbProcessIdTable = V.fromList sortedKeys
-
-        -- Build reverse lookup: (UUID, UUID) -> ProcessId (Int32)
-        dbProcessIdLookup = M.fromList $ zip sortedKeys [0 ..]
-
-        -- Build activity UUID index: UUID -> ProcessId (for O(1) lookups)
-        dbActivityUUIDIndex = M.fromList [(actUUID, pid) | (pid, (actUUID, _)) <- zip [0 ..] sortedKeys]
-
-        -- Build activity products index: UUID -> [ProcessId] (for multi-product activities)
-        dbActivityProductsIndex = M.fromListWith (++) [(actUUID, [pid]) | (pid, (actUUID, _)) <- zip [0 ..] sortedKeys]
-
-        -- Build activity-product lookup for correct multi-output handling
-        -- Maps (activityUUID, productFlowUUID) -> ProcessId
-        -- This ensures exchanges link to the correct product in multi-output activities
-        activityProductLookup = M.fromList [((actUUID, prodUUID), pid) | (pid, (actUUID, prodUUID)) <- zip [0 ..] sortedKeys]
-
-        -- Convert Map to Vector indexed by ProcessId
-        dbActivities = V.fromList [activityMap M.! key | key <- sortedKeys]
-
-        -- Build indexes (now using Vector)
-        indexes = buildIndexesWithProcessIds dbActivities dbProcessIdTable
-
-        -- Build supplier reference unit lookup: ProcessId -> unit name of reference product
-        -- Used to convert exchange amounts to the supplier's unit for correct A-matrix coefficients
-        supplierRefUnits =
-            V.map
-                ( \act ->
-                    let refExs = [ex | ex <- exchanges act, exchangeIsReference ex, not (exchangeIsInput ex)]
-                     in case refExs of
-                            (ex : _) -> getUnitNameForExchange unitDB ex
-                            [] -> ""
-                )
-                dbActivities
-
-    -- Build activity index for matrix construction
-    reportMatrixOperation "Building activity indexes"
-    let activityCount = fromIntegral (V.length dbActivities) :: Int32
-
-    -- Note: ProcessId is already the matrix index (identity mapping removed for performance)
     reportMatrixOperation ("Activity index built: " ++ show activityCount ++ " activities")
-
-    -- Build technosphere sparse triplets
     reportMatrixOperation "Building technosphere matrix triplets"
-    let buildTechTriple normalizationFactor j consumerActivity _consumerPid ex
-            -- Biosphere exchanges live in the B matrix, not A.
-            -- WasteExchanges share the A matrix with technosphere flows: the
-            -- underlying calculation is identical to a product link. Orphan
-            -- waste outputs (no activityLinkId) naturally drop out below when
-            -- producerIdx is Nothing — same as orphan tech inputs.
-            | isBiosphereExchange ex = Right ([], [])
-            | exchangeIsReference ex = Right ([], []) -- reference product is on the diagonal
-            | otherwise =
-                let producerResult = case exchangeProcessLinkId ex of
-                        Just pid -> (Just pid, [])
-                        Nothing -> case exchangeActivityLinkId ex of
-                            Just actUUID ->
-                                case M.lookup (actUUID, exchangeFlowId ex) activityProductLookup of
-                                    Just pid -> (Just pid, [])
-                                    Nothing ->
-                                        -- Only warn if exchange has non-zero amount (zero-amount are placeholders)
-                                        let warning =
-                                                [ "Missing activity-product pair referenced by exchange:\n"
-                                                    ++ "  Activity UUID: "
-                                                    ++ T.unpack (UUID.toText actUUID)
-                                                    ++ "\n"
-                                                    ++ "  Product UUID: "
-                                                    ++ T.unpack (UUID.toText (exchangeFlowId ex))
-                                                    ++ "\n"
-                                                    ++ "  Consumer: "
-                                                    ++ T.unpack (activityName consumerActivity)
-                                                    ++ "\n"
-                                                    ++ "  Expected file: "
-                                                    ++ T.unpack (UUID.toText actUUID)
-                                                    ++ "_"
-                                                    ++ T.unpack (UUID.toText (exchangeFlowId ex))
-                                                    ++ ".spold\n"
-                                                    ++ "  This exchange will be skipped."
-                                                | abs (exchangeAmount ex) > 1e-15
-                                                ]
-                                         in (Nothing, warning)
-                            Nothing -> (Nothing, [])
-                    (producerPid, warnings) = producerResult
-                    producerIdx =
-                        producerPid >>= \pid ->
-                            if pid >= 0 && fromIntegral pid < activityCount
-                                then Just $ fromIntegral pid
-                                else Nothing
-                 in case producerIdx of
-                        Just idx ->
-                            let rawValue = exchangeAmount ex
-                                exchangeUnit = getUnitNameForExchange unitDB ex
-                                supplierUnit = supplierRefUnits V.! fromIntegral idx
-                                needsConversion =
-                                    normalizeUnit exchangeUnit /= normalizeUnit supplierUnit
-                                        && not (T.null exchangeUnit)
-                                        && not (T.null supplierUnit)
-                             in case (needsConversion, convertUnit unitConfig exchangeUnit supplierUnit rawValue) of
-                                    (True, Nothing) ->
-                                        Left $
-                                            "Unknown unit conversion: \""
-                                                <> exchangeUnit
-                                                <> "\" \8594 \""
-                                                <> supplierUnit
-                                                <> "\" in "
-                                                <> activityName consumerActivity
-                                                <> " \8212 add these units to [[units]] CSV"
-                                    _ ->
-                                        let convertedValue = case (needsConversion, convertUnit unitConfig exchangeUnit supplierUnit rawValue) of
-                                                (True, Just v) -> v
-                                                _ -> rawValue
-                                            denom =
-                                                if normalizationFactor > 1e-15
-                                                    then normalizationFactor
-                                                    else 1.0
-                                            sign = if exchangeIsInput ex then 1 else -1
-                                            value = sign * convertedValue / denom
-                                         in Right ([SparseTriple idx j value | convertedValue /= 0, idx /= j], warnings)
-                        Nothing -> Right ([], warnings)
-
-        buildActivityTriplets (j, consumerPid) =
-            let consumerActivity = dbActivities V.! fromIntegral consumerPid
-                consumerKey = dbProcessIdTable V.! fromIntegral consumerPid
-                normalizationFactor = activityNormFactor consumerActivity consumerKey
-                buildNormalizedTechTriple = buildTechTriple normalizationFactor j consumerActivity consumerPid
-                results = map buildNormalizedTechTriple (exchanges consumerActivity)
-             in -- Short-circuit on first Left (unit conversion error)
-                case lefts results of
-                    (err : _) -> Left err
-                    [] -> let rs = rights results in Right (concatMap fst rs, concatMap snd rs)
-
-    -- Collect results, failing on first unit conversion error
-    let activityRange = [(fromIntegral j, j) | j <- [0 .. fromIntegral activityCount - 1 :: ProcessId]]
-        activityResults = map buildActivityTriplets activityRange
-    case lefts activityResults of
-        (err : _) -> return $ Left err
-        [] -> do
-            let allResults = rights activityResults
-                !techTriples = VU.fromList $ concatMap fst allResults
-                techWarnings = concatMap snd allResults
-
-            -- Emit warnings in IO context
+    case buildTechTriples unitConfig unitDB tables supplierRefUnits of
+        Left err -> pure (Left err)
+        Right (techTriples, techWarnings) -> do
             mapM_ (reportProgress Warning) techWarnings
-
             reportMatrixOperation ("Technosphere matrix: " ++ show (VU.length techTriples) ++ " non-zero entries")
 
-            -- Build biosphere sparse triplets
             reportMatrixOperation "Building biosphere matrix triplets"
-            let bioFlowUUIDs =
-                    V.fromList $
-                        sort $
-                            S.toList $
-                                S.fromList
-                                    [exchangeFlowId ex | pid <- [0 .. fromIntegral activityCount - 1 :: Int], let act = dbActivities V.! pid, ex <- exchanges act, isBiosphereExchange ex]
-                bioFlowCount = fromIntegral $ V.length bioFlowUUIDs :: Int32
-                bioFlowIndex = M.fromList $ zip (V.toList bioFlowUUIDs) [0 ..]
-
-                !bioTriples =
-                    let buildBioTriple normalizationFactor j _activity ex
-                            | not (isBiosphereExchange ex) = []
-                            | otherwise =
-                                case M.lookup (exchangeFlowId ex) bioFlowIndex of
-                                    Just i ->
-                                        let rawValue = exchangeAmount ex
-                                            denom = if normalizationFactor > 1e-15 then normalizationFactor else 1.0
-                                            value = rawValue / denom
-                                         in [SparseTriple i j value | rawValue /= 0]
-                                    Nothing -> []
-
-                        buildActivityBioTriplets (j, pid) =
-                            let activity = dbActivities V.! fromIntegral pid
-                                activityKey = dbProcessIdTable V.! fromIntegral pid
-                                normalizationFactor = activityNormFactor activity activityKey
-                             in concatMap (buildBioTriple normalizationFactor j activity) (exchanges activity)
-                     in VU.fromList $ concatMap buildActivityBioTriplets activityRange
+            let !bioFlowUUIDs = collectBioFlowOrder (itActivities tables)
+                !bioTriples = buildBioTriples bioFlowUUIDs tables
+                bioFlowCount = fromIntegral (V.length bioFlowUUIDs)
 
             reportMatrixOperation ("Biosphere matrix: " ++ show (VU.length bioTriples) ++ " non-zero entries")
             reportMatrixOperation "Database with matrices built successfully"
-            reportMatrixOperation ("Final matrix stats: " ++ show (VU.length techTriples) ++ " tech entries, " ++ show (VU.length bioTriples) ++ " bio entries")
+            reportMatrixOperation
+                ( "Final matrix stats: "
+                    ++ show (VU.length techTriples)
+                    ++ " tech entries, "
+                    ++ show (VU.length bioTriples)
+                    ++ " bio entries"
+                )
 
             reportMatrixOperation "Building product index"
-            let !productIndex = buildProductIndex dbActivities dbProcessIdTable techFlowDB
+            let !productIndex = buildProductIndex (itActivities tables) (itProcessIdTable tables) techFlowDB
             reportMatrixOperation ("Product index: " ++ show (M.size (piByUUID productIndex)) ++ " products indexed")
 
-            return $
+            pure $
                 Right
                     Database
-                        { dbProcessIdTable = dbProcessIdTable
-                        , dbProcessIdLookup = dbProcessIdLookup
-                        , dbActivityUUIDIndex = dbActivityUUIDIndex
-                        , dbActivityProductsIndex = dbActivityProductsIndex
+                        { dbProcessIdTable = itProcessIdTable tables
+                        , dbProcessIdLookup = itProcessIdLookup tables
+                        , dbActivityUUIDIndex = itActivityUUIDIndex tables
+                        , dbActivityProductsIndex = itActivityProductsIndex tables
                         , dbProductIndex = productIndex
-                        , dbActivities = dbActivities
+                        , dbActivities = itActivities tables
                         , dbTechFlows = techFlowDB
                         , dbBioFlows = bioFlowDB
                         , dbWasteFlows = wasteFlowDB
diff --git a/src/Database/MatrixBuild.hs b/src/Database/MatrixBuild.hs
new file mode 100644
index 00000000..d1f9e0ef
--- /dev/null
+++ b/src/Database/MatrixBuild.hs
@@ -0,0 +1,234 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{- | Pure builders for the sparse matrices and UUID interning tables that back
+'Database'. The IO entrypoint in "Database" composes these helpers and adds
+progress reporting; all numerical work lives here.
+
+Sign and normalization conventions are documented on
+'buildDatabaseWithMatrices'.
+-}
+module Database.MatrixBuild (
+    InterningTables (..),
+    buildInterningTables,
+    buildSupplierRefUnits,
+    collectBioFlowOrder,
+    buildTechTriples,
+    buildBioTriples,
+) where
+
+import Control.Applicative ((<|>))
+import Data.Foldable (fold)
+import Data.Int (Int32)
+import Data.List (sort)
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.UUID as UUID
+import qualified Data.Vector as V
+import qualified Data.Vector.Unboxed as VU
+import Types
+import UnitConversion (UnitConfig, convertUnit, normalizeUnit)
+
+{- | Per-process lookup tables built once from the sorted activity-key list.
+
+All fields share a single 'zip' [0..] sortedKeys' traversal, so row order
+and ProcessId ↔ (UUID, UUID) consistency hold by construction.
+-}
+data InterningTables = InterningTables
+    { itProcessIdTable :: !(V.Vector (UUID, UUID))
+    , itProcessIdLookup :: !(M.Map (UUID, UUID) ProcessId)
+    , itActivityUUIDIndex :: !(M.Map UUID ProcessId)
+    , itActivityProductsIndex :: !(M.Map UUID [ProcessId])
+    , itActivities :: !(V.Vector Activity)
+    , itActivityCount :: !Int32
+    }
+
+buildInterningTables :: M.Map (UUID, UUID) Activity -> InterningTables
+buildInterningTables activityMap =
+    InterningTables
+        { itProcessIdTable = V.fromList sortedKeys
+        , itProcessIdLookup = M.fromList [(k, pid) | (pid, k) <- indexedKeys]
+        , itActivityUUIDIndex = M.fromList [(actUUID, pid) | (pid, (actUUID, _)) <- indexedKeys]
+        , itActivityProductsIndex = M.fromListWith (++) [(actUUID, [pid]) | (pid, (actUUID, _)) <- indexedKeys]
+        , itActivities = V.fromList [activityMap M.! k | k <- sortedKeys]
+        , itActivityCount = fromIntegral (length sortedKeys)
+        }
+  where
+    sortedKeys = sort (M.keys activityMap)
+    indexedKeys = zip [0 ..] sortedKeys
+
+{- | Reference-product output unit for each activity (empty when the activity
+has no produced reference exchange — same fallback as the previous inline
+expression).
+-}
+buildSupplierRefUnits :: UnitDB -> V.Vector Activity -> V.Vector Text
+buildSupplierRefUnits unitDB = V.map refUnit
+  where
+    refUnit act =
+        case [ex | ex <- exchanges act, exchangeIsReference ex, not (exchangeIsInput ex)] of
+            (ex : _) -> getUnitNameForExchange unitDB ex
+            [] -> ""
+
+-- | Ascending vector of every biosphere flow UUID present in the activity set.
+collectBioFlowOrder :: V.Vector Activity -> V.Vector UUID
+collectBioFlowOrder activities =
+    V.fromList . S.toAscList . S.fromList $
+        [ exchangeFlowId ex
+        | act <- V.toList activities
+        , ex <- exchanges act
+        , isBiosphereExchange ex
+        ]
+
+{- | Divide-by-zero guard for normalization factors. Activities with no
+reference output normalize by 1.0 instead of a near-zero denominator.
+-}
+safeDenom :: Double -> Double
+safeDenom f = if f > 1e-15 then f else 1.0
+
+{- | Producer cascade for technosphere exchanges: prefer the resolved process
+link, else look up the (activityUUID, flowUUID) pair. This is the
+@Alternative@ on @Maybe@ — the @Maybe@ analogue of @firstNonEmpty@ in
+"Database.CrossLinking".
+-}
+findProducer :: M.Map (UUID, UUID) ProcessId -> Exchange -> Maybe ProcessId
+findProducer lkp ex =
+    exchangeProcessLinkId ex
+        <|> (exchangeActivityLinkId ex >>= \actUUID -> M.lookup (actUUID, exchangeFlowId ex) lkp)
+
+{- | Warning text for an exchange whose declared producer cannot be located.
+Zero-amount placeholder exchanges produce no warning.
+-}
+missingActivityWarning :: Activity -> Exchange -> UUID -> [String]
+missingActivityWarning consumer ex actUUID
+    | abs (exchangeAmount ex) <= 1e-15 = []
+    | otherwise =
+        [ "Missing activity-product pair referenced by exchange:\n"
+            ++ "  Activity UUID: "
+            ++ T.unpack (UUID.toText actUUID)
+            ++ "\n"
+            ++ "  Product UUID: "
+            ++ T.unpack (UUID.toText (exchangeFlowId ex))
+            ++ "\n"
+            ++ "  Consumer: "
+            ++ T.unpack (activityName consumer)
+            ++ "\n"
+            ++ "  Expected file: "
+            ++ T.unpack (UUID.toText actUUID)
+            ++ "_"
+            ++ T.unpack (UUID.toText (exchangeFlowId ex))
+            ++ ".spold\n"
+            ++ "  This exchange will be skipped."
+        ]
+
+unitConversionError :: Activity -> Text -> Text -> Text
+unitConversionError consumer fromU toU =
+    "Unknown unit conversion: \""
+        <> fromU
+        <> "\" \8594 \""
+        <> toU
+        <> "\" in "
+        <> activityName consumer
+        <> " \8212 add these units to [[units]] CSV"
+
+{- | Flatten the activity set into a stream of @(normFactor, j, activity, ex)@
+tuples. Encapsulates the shared "for each activity j: get the activity,
+get its key, get its norm factor, fold its exchanges" boilerplate.
+-}
+perActivity :: InterningTables -> [(Double, ProcessId, Activity, Exchange)]
+perActivity tables =
+    [ (normFactor, j, act, ex)
+    | j <- [0 .. itActivityCount tables - 1]
+    , let act = itActivities tables V.! fromIntegral j
+    , let key = itProcessIdTable tables V.! fromIntegral j
+    , let normFactor = activityNormFactor act key
+    , ex <- exchanges act
+    ]
+
+{- | Technosphere sparse triplets + skipped-link warnings.
+
+Short-circuits on the first unit-conversion error. Accumulates triplets
+and warnings via the tuple Monoid, mirroring the @foldMap@ stats pattern
+used in "Method.Mapping".
+-}
+buildTechTriples ::
+    UnitConfig ->
+    UnitDB ->
+    InterningTables ->
+    V.Vector Text ->
+    Either Text (VU.Vector SparseTriple, [String])
+buildTechTriples unitConfig unitDB tables supplierRefUnits =
+    fmap pack (traverse step (perActivity tables))
+  where
+    lkp = itProcessIdLookup tables
+    actCount = itActivityCount tables
+    step (normFactor, j, act, ex) =
+        techTriple unitConfig unitDB lkp supplierRefUnits actCount normFactor j act ex
+    pack rs = let (ts, ws) = fold rs in (VU.fromList ts, ws)
+
+techTriple ::
+    UnitConfig ->
+    UnitDB ->
+    M.Map (UUID, UUID) ProcessId ->
+    V.Vector Text ->
+    Int32 ->
+    Double ->
+    ProcessId ->
+    Activity ->
+    Exchange ->
+    Either Text ([SparseTriple], [String])
+techTriple unitConfig unitDB lkp supplierRefUnits actCount normFactor j consumer ex
+    -- Biosphere flows live in B; reference products sit on the diagonal of (I-A).
+    -- WasteExchanges share A: same product-link calculation as a technosphere flow.
+    -- Orphan waste outputs (no activityLinkId) drop out below when findProducer is Nothing.
+    | isBiosphereExchange ex = Right mempty
+    | exchangeIsReference ex = Right mempty
+    | otherwise =
+        case validProducerIdx of
+            Nothing -> Right ([], producerWarnings)
+            Just idx -> emitTriple idx
+  where
+    producerPid = findProducer lkp ex
+    validProducerIdx = do
+        pid <- producerPid
+        if pid >= 0 && pid < actCount then Just pid else Nothing
+    -- Only warn when the activity-link lookup itself failed (Just actUUID + no pid).
+    -- A missing processLink with no activityLink is a true orphan, not a data gap.
+    producerWarnings = case (producerPid, exchangeActivityLinkId ex) of
+        (Nothing, Just actUUID) -> missingActivityWarning consumer ex actUUID
+        _ -> []
+
+    emitTriple idx =
+        let raw = exchangeAmount ex
+            exchUnit = getUnitNameForExchange unitDB ex
+            suppUnit = supplierRefUnits V.! fromIntegral idx
+            needsConversion =
+                normalizeUnit exchUnit /= normalizeUnit suppUnit
+                    && not (T.null exchUnit)
+                    && not (T.null suppUnit)
+         in case (needsConversion, convertUnit unitConfig exchUnit suppUnit raw) of
+                (True, Nothing) -> Left (unitConversionError consumer exchUnit suppUnit)
+                (True, Just v) -> Right (triplesFor idx v, [])
+                (False, _) -> Right (triplesFor idx raw, [])
+
+    triplesFor idx v =
+        let sign = if exchangeIsInput ex then 1 else -1
+            value = sign * v / safeDenom normFactor
+         in [SparseTriple idx j value | v /= 0, idx /= j]
+
+{- | Biosphere sparse triplets. No unit conversion or producer cascade — each
+biosphere exchange maps directly to its row via 'collectBioFlowOrder'.
+-}
+buildBioTriples :: V.Vector UUID -> InterningTables -> VU.Vector SparseTriple
+buildBioTriples bioOrder tables =
+    VU.fromList $ concatMap step (perActivity tables)
+  where
+    bioIndex = M.fromList $ zip (V.toList bioOrder) [0 ..]
+    step (normFactor, j, _act, ex)
+        | not (isBiosphereExchange ex) = []
+        | otherwise = case M.lookup (exchangeFlowId ex) bioIndex of
+            Nothing -> []
+            Just i ->
+                let raw = exchangeAmount ex
+                    value = raw / safeDenom normFactor
+                 in [SparseTriple i j value | raw /= 0]
diff --git a/volca.cabal b/volca.cabal
index a5269356..b57034a5 100644
--- a/volca.cabal
+++ b/volca.cabal
@@ -39,6 +39,7 @@ library
                      , Database.Manager
                      , Database.Loader
                      , Database.CrossLinking
+                     , Database.MatrixBuild
                      , Database.Upload
                      , Database.UploadedDatabase
                      , SynonymDB

From 4b751e3a53e86820221982c552d99854c7939cfc Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 16:47:36 +0200
Subject: [PATCH 24/43] refactor(service): split applySubstitutionsAt planning
 from effect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 4-case classifier in applySubstitutionsAt dispatched on a tuple of
booleans (fromDb == thisDbName, toDb == thisDbName) and interleaved
matrix-perturbation arithmetic with IO calls into perturbA. The cases
were unnamed and the rank-1 update was reconstructed inline in each
branch.

Introduce three substitution-domain ADTs:

* Endpoint = Here ProcessId UUIDs | Elsewhere DepRef — replaces the
  tuple-of-bools dispatch with named, exhaustive constructors.
* DepRef — the (dbName, db, pid, uuids) bundle the cross-DB helpers
  consume; replaces today's anonymous 4-tuple.
* RankOneUpdate — the planned (consumerPid, perturbation, extras) value
  that planUpdate produces and applyRankOne consumes.

planUpdate is now a pure four-clause Either, one clause per case, with
findTechCoefficient / findStaticCrossDBLink lifted into requireTech /
requireStatic helpers that surface the existing 422-mapped MatrixError.
applyRankOne shrinks to a one-line mapM over perturbA. resolveRef
becomes resolveEndpoint, returning the named Endpoint.

No behaviour change: substitution + cross-DB specs still pass (44
examples). Helpers remain in the where-block; foldM/ExceptT cleanup
and lifting to top-level follow in the next commits.
---
 src/Service.hs | 242 +++++++++++++++++++++++++++----------------------
 1 file changed, 136 insertions(+), 106 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index de004c89..36719c13 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -2283,6 +2283,38 @@ computeScalingVectorWithSubstitutionsCrossDB depLookup db rootDbName solver pid
             (d : _) -> Just d
             [] -> Nothing
 
+{- | A substitution endpoint resolved against the loaded databases.
+'Here' means the activity lives in @thisDb@ (no cross-DB plumbing
+needed); 'Elsewhere' carries the dep-DB descriptor required to look up
+static cross-DB links and synthesise virtual ones.
+
+The ADT replaces a @(Bool, Bool)@ dispatch on @(fromDb == thisDbName,
+toDb == thisDbName)@: each constructor names what the boolean meant.
+-}
+data Endpoint
+    = Here !ProcessId !(UUID, UUID)
+    | Elsewhere !DepRef
+
+-- | An endpoint that lives in a dependency database.
+data DepRef = DepRef
+    { drDbName :: !Text
+    , drDb :: !Database
+    , drPid :: !ProcessId
+    , drUUIDs :: !(UUID, UUID)
+    }
+
+{- | A planned rank-1 perturbation of one consumer column plus any virtual
+cross-DB links the substitution introduces or cancels. Computed purely
+from resolved endpoints by 'planUpdate'; consumed effectfully by
+'applyRankOne'. Separating the two keeps the four substitution cases
+out of the IO layer and makes them straightforward to test in isolation.
+-}
+data RankOneUpdate = RankOneUpdate
+    { ruConsumerPid :: !ProcessId
+    , ruPerturb :: ![(Int, Double)]
+    , ruExtras :: ![CrossDBLink]
+    }
+
 {- | Apply all substitutions whose consumer lives in @thisDbName@ to the
 given scaling vectors. Substitutions whose consumer lives elsewhere are
 skipped at this level — they'll match at the DB where their consumer
@@ -2353,130 +2385,128 @@ applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs =
         let (_, cPidText) = parseSubRef rootDb (subConsumer sub)
         case resolveActivityAndProcessId thisDb cPidText of
             Left e -> pure (Left e)
-            Right (consumerPid, _) -> do
+            Right (cPid, _) -> do
                 let (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub)
                     (toDb, toPidText) = parseSubRef rootDb (subTo sub)
-                eFrom <- resolveRef fromDb fromPidText
-                eTo <- resolveRef toDb toPidText
+                eFrom <- resolveEndpoint fromDb fromPidText
+                eTo <- resolveEndpoint toDb toPidText
                 case (eFrom, eTo) of
                     (Left e, _) -> pure (Left e)
                     (_, Left e) -> pure (Left e)
-                    (Right fromRef, Right toRef) ->
-                        classify mFact xs consumerPid sub fromRef toRef
-
-    classify mFact xs consumerPid sub fromRef toRef =
-        let (fromDb, fromPid, fromUUIDs, _) = fromRef
-            (toDb, toPid, toUUIDs, toDbObj) = toRef
-         in case (fromDb == thisDbName, toDb == thisDbName) of
-                (True, True) ->
-                    case findTechCoefficient thisDb consumerPid fromPid of
-                        Nothing -> noTechLinkErr sub consumerPid
-                        Just aNorm ->
-                            applyRankOne
-                                mFact
-                                xs
-                                consumerPid
-                                [ (fromIntegral fromPid, aNorm)
-                                , (fromIntegral toPid, -aNorm)
-                                ]
-                                []
-                (True, False) ->
-                    -- Case B: drop this-DB oldSup, route demand to other-DB newSup.
-                    case findTechCoefficient thisDb consumerPid fromPid of
-                        Nothing -> noTechLinkErr sub consumerPid
-                        Just aNorm ->
-                            let aRaw = aNorm * activityNormalizationFactor thisDb consumerPid
-                                newLk = mkVirtualLink thisDb consumerPid toDbObj toDb toUUIDs toPid aRaw
-                             in applyRankOne
-                                    mFact
-                                    xs
-                                    consumerPid
-                                    [(fromIntegral fromPid, aNorm)]
-                                    [newLk]
-                (False, True) ->
-                    -- Case C: cancel existing cross-DB link, pull new this-DB supplier.
-                    case findStaticCrossDBLink thisDb consumerPid fromDb fromUUIDs of
-                        Nothing -> noStaticLinkErr sub consumerPid fromDb fromPid
-                        Just staticLk ->
-                            let aRaw = cdlCoefficient staticLk
-                                aNorm = aRaw / activityNormalizationFactor thisDb consumerPid
-                                cancel = staticLk{cdlCoefficient = -aRaw}
-                             in applyRankOne
-                                    mFact
-                                    xs
-                                    consumerPid
-                                    [(fromIntegral toPid, -aNorm)]
-                                    [cancel]
-                (False, False) ->
-                    -- Case D: re-route demand between two other DBs; this-DB x unchanged.
-                    case findStaticCrossDBLink thisDb consumerPid fromDb fromUUIDs of
-                        Nothing -> noStaticLinkErr sub consumerPid fromDb fromPid
-                        Just staticLk ->
-                            let aRaw = cdlCoefficient staticLk
-                                cancel = staticLk{cdlCoefficient = -aRaw}
-                                newLk = mkVirtualLink thisDb consumerPid toDbObj toDb toUUIDs toPid aRaw
-                             in applyRankOne mFact xs consumerPid [] [cancel, newLk]
-
-    applyRankOne mFact xs consumerPid perturb extra = do
+                    (Right fromEp, Right toEp) ->
+                        case planUpdate sub cPid fromEp toEp of
+                            Left e -> pure (Left e)
+                            Right upd -> applyRankOne mFact xs upd
+
+    planUpdate ::
+        Substitution ->
+        ProcessId ->
+        Endpoint ->
+        Endpoint ->
+        Either ServiceError RankOneUpdate
+    -- Case A: both suppliers in this DB. Symmetric rank-1 on the consumer column.
+    planUpdate sub cPid (Here fromPid _) (Here toPid _) = do
+        a <- requireTech sub cPid fromPid
+        Right $
+            RankOneUpdate
+                cPid
+                [(fromIntegral fromPid, a), (fromIntegral toPid, -a)]
+                []
+    -- Case B: drop this-DB oldSup, route demand to other-DB newSup.
+    -- aRaw = aNorm * normFactor (the cross-DB link stores *raw* coefficients).
+    planUpdate sub cPid (Here fromPid _) (Elsewhere toRef) = do
+        a <- requireTech sub cPid fromPid
+        let aRaw = a * activityNormalizationFactor thisDb cPid
+            newLk = virtualLinkTo cPid toRef aRaw
+        Right $ RankOneUpdate cPid [(fromIntegral fromPid, a)] [newLk]
+    -- Case C: cancel existing cross-DB link, pull new this-DB supplier.
+    planUpdate sub cPid (Elsewhere fromRef) (Here toPid _) = do
+        s <- requireStatic sub cPid fromRef
+        let aRaw = cdlCoefficient s
+            aNorm = aRaw / activityNormalizationFactor thisDb cPid
+            cancel = s{cdlCoefficient = -aRaw}
+        Right $ RankOneUpdate cPid [(fromIntegral toPid, -aNorm)] [cancel]
+    -- Case D: re-route demand between two other DBs; this-DB x unchanged.
+    -- Unlike Case B, the new-link coefficient is the *raw* static value,
+    -- not aNorm*normFactor — we're forwarding what the cancelled link carried.
+    planUpdate sub cPid (Elsewhere fromRef) (Elsewhere toRef) = do
+        s <- requireStatic sub cPid fromRef
+        let aRaw = cdlCoefficient s
+            cancel = s{cdlCoefficient = -aRaw}
+            newLk = virtualLinkTo cPid toRef aRaw
+        Right $ RankOneUpdate cPid [] [cancel, newLk]
+
+    virtualLinkTo cPid toRef =
+        mkVirtualLink thisDb cPid (drDb toRef) (drDbName toRef) (drUUIDs toRef) (drPid toRef)
+
+    requireTech sub cPid fromPid =
+        maybe (Left $ noTechLink sub cPid) Right $
+            findTechCoefficient thisDb cPid fromPid
+
+    requireStatic sub cPid fromRef =
+        maybe (Left $ noStaticLink sub cPid (drDbName fromRef) (drPid fromRef)) Right $
+            findStaticCrossDBLink thisDb cPid (drDbName fromRef) (drUUIDs fromRef)
+
+    applyRankOne mFact xs upd = do
         -- Apply the same rank-1 update to each of the K vectors. z depends
         -- only on u (not x); a future optimization can compute z once.
         results <-
             mapM
-                (\x -> perturbA thisDb mFact x (fromIntegral consumerPid) perturb)
+                (\x -> perturbA thisDb mFact x (fromIntegral (ruConsumerPid upd)) (ruPerturb upd))
                 xs
         pure $ case sequence results of
             Left msg -> Left (MatrixError msg)
-            Right xs' -> Right (xs', extra)
-
-    noTechLinkErr sub consumerPid =
-        pure $
-            Left $
-                MatrixError $
-                    "No technosphere link from "
-                        <> processIdToText thisDb consumerPid
-                        <> " to supplier "
-                        <> subFrom sub
-
-    noStaticLinkErr sub consumerPid fromDb fromPid =
-        pure $
-            Left $
-                MatrixError $
-                    "no cross-DB link from "
-                        <> processIdToText thisDb consumerPid
-                        <> " to "
-                        <> fromDb
-                        <> "::"
-                        <> T.pack (show fromPid)
-                        <> " (requested by substitution "
-                        <> subFrom sub
-                        <> " -> "
-                        <> subTo sub
-                        <> ")"
-
-    resolveRef :: Text -> Text -> IO (Either ServiceError (Text, ProcessId, (UUID, UUID), Database))
-    resolveRef refDb pidText
-        | refDb == thisDbName = case resolveActivityAndProcessId thisDb pidText of
-            Left e -> pure (Left e)
-            Right (p, _) ->
-                let uuids = dbProcessIdTable thisDb V.! fromIntegral p
-                 in pure (Right (refDb, p, uuids, thisDb))
+            Right xs' -> Right (xs', ruExtras upd)
+
+    noTechLink sub cPid =
+        MatrixError $
+            "No technosphere link from "
+                <> processIdToText thisDb cPid
+                <> " to supplier "
+                <> subFrom sub
+
+    noStaticLink sub cPid fromDb fromPid =
+        MatrixError $
+            "no cross-DB link from "
+                <> processIdToText thisDb cPid
+                <> " to "
+                <> fromDb
+                <> "::"
+                <> T.pack (show fromPid)
+                <> " (requested by substitution "
+                <> subFrom sub
+                <> " -> "
+                <> subTo sub
+                <> ")"
+
+    resolveEndpoint :: Text -> Text -> IO (Either ServiceError Endpoint)
+    resolveEndpoint refDb pidText
+        | refDb == thisDbName =
+            pure $ case resolveActivityAndProcessId thisDb pidText of
+                Left e -> Left e
+                Right (p, _) ->
+                    Right $ Here p (dbProcessIdTable thisDb V.! fromIntegral p)
         | otherwise = do
             mPair <- depLookup refDb
-            case mPair of
+            pure $ case mPair of
                 Nothing ->
-                    pure $
-                        Left $
-                            MatrixError $
-                                "substitution references unloaded database: " <> refDb
+                    Left $
+                        MatrixError $
+                            "substitution references unloaded database: " <> refDb
                 Just (depDb, _) -> case resolveActivityAndProcessId depDb pidText of
                     Left _ ->
-                        pure $
-                            Left $
-                                MatrixError $
-                                    "substitution PID not found in " <> refDb <> ": " <> pidText
+                        Left $
+                            MatrixError $
+                                "substitution PID not found in " <> refDb <> ": " <> pidText
                     Right (p, _) ->
-                        let uuids = dbProcessIdTable depDb V.! fromIntegral p
-                         in pure (Right (refDb, p, uuids, depDb))
+                        Right $
+                            Elsewhere
+                                DepRef
+                                    { drDbName = refDb
+                                    , drDb = depDb
+                                    , drPid = p
+                                    , drUUIDs = dbProcessIdTable depDb V.! fromIntegral p
+                                    }
 
 {- | Build a synthesized 'CrossDBLink' for a what-if substitution targeting a
 dep-DB supplier. Mirrors the fields a real (load-time) link would have so

From 5aa7630616da15467433ddf6a75622fb6174f752 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 16:50:02 +0200
Subject: [PATCH 25/43] refactor(service): drive applySubstitutionsAt with
 foldM over ExceptT

Replace the hand-written applyAll/applySub recursion (manual Left
propagation, threaded accumulators) with foldM in ExceptT ServiceError
IO. One step function now sequences the three Either/IO operations
(resolve consumer, resolve from/to endpoints, plan the rank-1 update,
apply it) using monadic bind, so the four-case (Left e, _) ladder
collapses to natural short-circuit on the first failure.

Resolution order (from before to) and the call-once placement of
getFactorization are preserved verbatim; both were called out in the
plan as behaviour-sensitive.
---
 src/Service.hs | 47 ++++++++++++++++++++---------------------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index 36719c13..ebd8ab6c 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -8,6 +8,8 @@ import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), Ac
 import CLI.Types (DebugMatricesOptions (..))
 import Control.Concurrent.Async (mapConcurrently)
 import Control.Exception (SomeException, try)
+import Control.Monad (foldM)
+import Control.Monad.Trans.Except (ExceptT (..), runExceptT)
 import Data.Aeson (Value, object, toJSON, (.=))
 import Data.Either (fromRight, lefts, rights)
 import Data.Int (Int32)
@@ -2352,13 +2354,12 @@ applySubstitutionsAt ::
     -- | full sub list (filtered to consumer==this)
     [Substitution] ->
     IO (Either ServiceError ([U.Vector Double], [CrossDBLink]))
-applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs = do
-    let localSubs = filter consumerLivesHere allSubs
-    case localSubs of
+applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs =
+    case filter consumerLivesHere allSubs of
         [] -> pure $ Right (scalings, [])
-        _ -> do
+        localSubs -> do
             mFact <- getFactorization solver
-            applyAll mFact scalings [] localSubs
+            runExceptT $ foldM (step mFact) (scalings, []) localSubs
   where
     thisDbName = unThisDb thisDbObj
 
@@ -2374,29 +2375,21 @@ applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs =
         let (cDb, _) = parseSubRef rootDb (subConsumer sub)
          in cDb == thisDbName
 
-    applyAll _ xs links [] = pure $ Right (xs, links)
-    applyAll mFact xs links (sub : rest) = do
-        res <- applySub mFact xs sub
-        case res of
-            Left e -> pure (Left e)
-            Right (xs', extraLks) -> applyAll mFact xs' (links ++ extraLks) rest
-
-    applySub mFact xs sub = do
+    -- Resolve, plan, and apply one substitution; thread the K scalings
+    -- and the accumulated virtual links. 'from' is resolved before 'to'
+    -- so a failing 'from' wins when both refs are unresolvable.
+    step mFact (xs, links) sub = do
         let (_, cPidText) = parseSubRef rootDb (subConsumer sub)
-        case resolveActivityAndProcessId thisDb cPidText of
-            Left e -> pure (Left e)
-            Right (cPid, _) -> do
-                let (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub)
-                    (toDb, toPidText) = parseSubRef rootDb (subTo sub)
-                eFrom <- resolveEndpoint fromDb fromPidText
-                eTo <- resolveEndpoint toDb toPidText
-                case (eFrom, eTo) of
-                    (Left e, _) -> pure (Left e)
-                    (_, Left e) -> pure (Left e)
-                    (Right fromEp, Right toEp) ->
-                        case planUpdate sub cPid fromEp toEp of
-                            Left e -> pure (Left e)
-                            Right upd -> applyRankOne mFact xs upd
+            (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub)
+            (toDb, toPidText) = parseSubRef rootDb (subTo sub)
+        (cPid, _) <- hoistEither $ resolveActivityAndProcessId thisDb cPidText
+        fromEp <- ExceptT $ resolveEndpoint fromDb fromPidText
+        toEp <- ExceptT $ resolveEndpoint toDb toPidText
+        upd <- hoistEither $ planUpdate sub cPid fromEp toEp
+        (xs', extra) <- ExceptT $ applyRankOne mFact xs upd
+        pure (xs', links ++ extra)
+
+    hoistEither = ExceptT . pure
 
     planUpdate ::
         Substitution ->

From 92b885a0495669ed38f29b80fc45949d86e1af6f Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 23:06:42 +0200
Subject: [PATCH 26/43] refactor(service): collapse convertExchangeWithUnit via
 LambdaCase + Alternative
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 91-line where-clause inside convertActivityForAPI re-matched the
Exchange variant six times (three parallel flow lookups, one target
resolver, one flow-name merger) and threaded a correlated
(Maybe, Maybe, Maybe) triple that could drift apart in principle.

Refactor:

- TargetRef record replaces the correlated triple — either all three
  fields are present or none. Project to the wire-format Maybes at the
  boundary only.
- resolveTarget / resolveFlow each do one \case over Exchange,
  eliminating the per-side empty-Nothing repetitions.
- Three named resolvers (byActivityUUID / byProductFlow / byCrossDBLink)
  return Maybe TargetRef; the SimaPro fallback chain is now <|>.
- Two duplicated 5-line projections (Activity -> triple, CrossDBLink ->
  triple) extracted as activityToTarget / crossDBLinkToTarget.
- crossDBLinkMap lifted out of the where to module-level
  buildCrossDBLinkMap.

Semantics preserved: technosphere broken-link (linkId set but
unresolvable) returns Nothing rather than falling through to the
product-flow path, matching the original guard ladder.

Verified: cabal build clean, cabal test 1107 / 0 failures.
---
 src/Service.hs | 230 ++++++++++++++++++++++++++-----------------------
 1 file changed, 124 insertions(+), 106 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index 1be5fe68..2bd50ee0 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TypeApplications #-}
 
@@ -6,6 +7,7 @@ module Service where
 
 import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), parseSubRef, unresolvedFlowName)
 import CLI.Types (DebugMatricesOptions (..))
+import Control.Applicative ((<|>))
 import Control.Concurrent.Async (mapConcurrently)
 import Control.Exception (SomeException, try)
 import Control.Monad (foldM)
@@ -1082,6 +1084,7 @@ convertActivityForAPI unitCfg db processId activity =
             Just (activityUUID, _) -> getAllProductsForActivity db activityUUID
             Nothing -> []
         (refProdName, refProdAmount, refProdUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity
+        linkMap = buildCrossDBLinkMap db processId
      in ActivityForAPI
             { pfaProcessId = processIdToText db processId
             , pfaName = activityName activity
@@ -1094,115 +1097,130 @@ convertActivityForAPI unitCfg db processId activity =
             , pfaReferenceProductAmount = if T.null refProdName then Nothing else Just refProdAmount
             , pfaReferenceProductUnit = if T.null refProdName then Nothing else Just refProdUnit
             , pfaAllProducts = allProducts
-            , pfaExchanges = map convertExchangeWithUnit (exchanges activity)
+            , pfaExchanges = map (toExchangeWithUnit unitCfg db linkMap) (exchanges activity)
             , pfaNativeType = activityNativeType activity
             }
-  where
-    -- Cross-DB link lookup keyed by 'cdlConsumerFlowId' (the consumer-side flow
-    -- UUID). UUIDs are unique across flow kinds, so a tech "X" link and a waste
-    -- "X" link on the same activity cannot collide here.
-    crossDBLinkMap :: M.Map UUID CrossDBLink
-    crossDBLinkMap = case processIdToUUIDs db processId of
-        Just (actUUID, _) ->
-            M.fromList
-                [ (cdlConsumerFlowId link, link)
-                | link <- dbCrossDBLinks db
-                , cdlConsumerActUUID link == actUUID
-                , cdlConsumerFlowId link /= UUID.nil
-                ]
-        Nothing -> M.empty
 
-    convertExchangeWithUnit exchange =
-        let
-            -- Look up the flow in the appropriate side (tech vs bio vs waste) based on exchange variant.
-            techFlowInfo = case exchange of
-                TechnosphereExchange{techFlowId = fid} -> M.lookup fid (dbTechFlows db)
-                BiosphereExchange{} -> Nothing
-                WasteExchange{} -> Nothing
-            bioFlowInfo = case exchange of
-                BiosphereExchange{bioFlowId = fid} -> M.lookup fid (dbBioFlows db)
-                TechnosphereExchange{} -> Nothing
-                WasteExchange{} -> Nothing
-            wasteFlowInfo = case exchange of
-                WasteExchange{waFlowId = fid} -> M.lookup fid (dbWasteFlows db)
-                TechnosphereExchange{} -> Nothing
-                BiosphereExchange{} -> Nothing
-            (targetActivityName, targetActivityLocation, targetProcessId) = case exchange of
-                TechnosphereExchange{techFlowId = fId, techRole = role, techActivityLinkId = linkId}
-                    | (role == Input || role == ReferenceInput) && linkId /= UUID.nil ->
-                        case findActivityByActivityUUID db linkId of
-                            Just targetActivity ->
-                                let maybeProcessId = findProcessIdByActivityUUID db linkId
-                                    processIdText = fmap (processIdToText db) maybeProcessId
-                                 in (Just (activityName targetActivity), Just (activityLocation targetActivity), processIdText)
-                            Nothing -> (Nothing, Nothing, Nothing)
-                    | role == Input || role == ReferenceInput ->
-                        -- SimaPro path: linkId is nil, resolve by product flow UUID
-                        case findProcessIdByProductFlowWithFallback unitCfg db fId of
-                            Just pid
-                                | Just act <- getActivity db pid ->
-                                    (Just (activityName act), Just (activityLocation act), Just (processIdToText db pid))
-                            _ ->
-                                case M.lookup fId crossDBLinkMap of
-                                    Just link ->
-                                        let crossPid =
-                                                cdlSourceDatabase link
-                                                    <> "::"
-                                                    <> UUID.toText (cdlSupplierActUUID link)
-                                                    <> "_"
-                                                    <> UUID.toText (cdlSupplierProdUUID link)
-                                         in (Just (cdlFlowName link), Just (cdlLocation link), Just crossPid)
-                                    Nothing -> (Nothing, Nothing, Nothing)
-                    | otherwise -> (Nothing, Nothing, Nothing)
-                BiosphereExchange{} -> (Nothing, Nothing, Nothing)
-                -- A waste exchange consumed by a treatment activity links the
-                -- generator to that treatment, same shape as a technosphere
-                -- Input. Orphan waste outputs that the exact-match cross-DB
-                -- linker resolved show up via 'crossDBLinkMap' keyed on the
-                -- waste flow name. The remaining orphans stay unresolved.
-                WasteExchange{waActivityLinkId = linkId, waIsInput = True}
-                    | linkId /= UUID.nil ->
-                        case findActivityByActivityUUID db linkId of
-                            Just targetActivity ->
-                                let maybeProcessId = findProcessIdByActivityUUID db linkId
-                                    processIdText = fmap (processIdToText db) maybeProcessId
-                                 in (Just (activityName targetActivity), Just (activityLocation targetActivity), processIdText)
-                            Nothing -> (Nothing, Nothing, Nothing)
-                WasteExchange{waActivityLinkId = linkId, waIsInput = False, waFlowId = fid}
-                    | linkId == UUID.nil ->
-                        case M.lookup fid crossDBLinkMap of
-                            Just link ->
-                                let crossPid =
-                                        cdlSourceDatabase link
-                                            <> "::"
-                                            <> UUID.toText (cdlSupplierActUUID link)
-                                            <> "_"
-                                            <> UUID.toText (cdlSupplierProdUUID link)
-                                 in (Just (cdlFlowName link), Just (cdlLocation link), Just crossPid)
-                            Nothing -> (Nothing, Nothing, Nothing)
-                WasteExchange{} -> (Nothing, Nothing, Nothing)
-            -- When the exchange's flow UUID resolves on none of the three sides
-            -- we surface the raw UUID rather than a generic "unknown" — that
-            -- way the consumer can tell which flow failed to resolve and the
-            -- gap is debuggable instead of silently misnamed.
-            flowNameTxt = case (techFlowInfo, bioFlowInfo, wasteFlowInfo) of
-                (Just tf, _, _) -> tfName tf
-                (_, Just bf, _) -> bfName bf
-                (_, _, Just wf) -> wfName wf
-                _ -> "<unresolved flow " <> UUID.toText (exchangeFlowId exchange) <> ">"
-            compartment = bfCompartment =<< bioFlowInfo
-         in
-            ExchangeWithUnit
-                { ewuExchange = exchange
-                , ewuUnitName = getUnitNameForExchange (dbUnits db) exchange
-                , ewuFlowName = flowNameTxt
-                , ewuCompartment = compartment
-                , ewuTargetActivity = targetActivityName
-                , ewuTargetLocation = targetActivityLocation
-                , ewuTargetProcessId = targetProcessId
-                , ewuExComment = exchangeComment exchange
-                , ewuPedigree = exchangePedigree exchange
-                }
+{- | Resolved target activity for a technosphere or waste exchange. Either all
+three fields are present (Just TargetRef) or none (Nothing) — the formerly
+correlated triple of Maybes can no longer drift apart.
+-}
+data TargetRef = TargetRef
+    { trName :: !Text
+    , trLocation :: !Text
+    , trProcessId :: !Text
+    }
+
+activityToTarget :: Database -> ProcessId -> Activity -> TargetRef
+activityToTarget db pid act =
+    TargetRef (activityName act) (activityLocation act) (processIdToText db pid)
+
+crossDBLinkToTarget :: CrossDBLink -> TargetRef
+crossDBLinkToTarget link =
+    TargetRef
+        (cdlFlowName link)
+        (cdlLocation link)
+        ( cdlSourceDatabase link
+            <> "::"
+            <> UUID.toText (cdlSupplierActUUID link)
+            <> "_"
+            <> UUID.toText (cdlSupplierProdUUID link)
+        )
+
+-- | EcoSpold path: resolve a target by activity UUID.
+resolveByActivityUUID :: Database -> UUID -> Maybe TargetRef
+resolveByActivityUUID db linkId
+    | linkId == UUID.nil = Nothing
+    | otherwise = do
+        pid <- findProcessIdByActivityUUID db linkId
+        act <- getActivity db pid
+        pure (activityToTarget db pid act)
+
+-- | SimaPro path: resolve a target by product flow UUID.
+resolveByProductFlow :: UnitConfig -> Database -> UUID -> Maybe TargetRef
+resolveByProductFlow cfg db fId = do
+    pid <- findProcessIdByProductFlowWithFallback cfg db fId
+    act <- getActivity db pid
+    pure (activityToTarget db pid act)
+
+-- | Cross-database link resolution (orphan waste outputs, missing tech links).
+resolveByCrossDBLink :: M.Map UUID CrossDBLink -> UUID -> Maybe TargetRef
+resolveByCrossDBLink links fId = crossDBLinkToTarget <$> M.lookup fId links
+
+{- | Resolve the target activity (if any) for one exchange. Technosphere broken
+links (linkId set but unresolvable) do NOT fall through to the product-flow
+path — that matches the original behaviour. Use '<|>' to chain fallbacks only
+where the original code did.
+-}
+resolveTarget
+    :: UnitConfig
+    -> Database
+    -> M.Map UUID CrossDBLink
+    -> Exchange
+    -> Maybe TargetRef
+resolveTarget cfg db links = \case
+    TechnosphereExchange{techRole = role, techActivityLinkId = lid, techFlowId = fid}
+        | role /= Input && role /= ReferenceInput -> Nothing
+        | lid /= UUID.nil -> resolveByActivityUUID db lid
+        | otherwise -> resolveByProductFlow cfg db fid <|> resolveByCrossDBLink links fid
+    BiosphereExchange{} -> Nothing
+    WasteExchange{waIsInput = True, waActivityLinkId = lid}
+        | lid /= UUID.nil -> resolveByActivityUUID db lid
+        | otherwise -> Nothing
+    WasteExchange{waIsInput = False, waActivityLinkId = lid, waFlowId = fid}
+        | lid == UUID.nil -> resolveByCrossDBLink links fid
+        | otherwise -> Nothing
+
+{- | Flow name + (biosphere-only) compartment. Each variant has exactly one
+flow side by construction, so no Maybe-merge is needed downstream.
+-}
+resolveFlow :: Database -> Exchange -> Maybe (Text, Maybe Compartment)
+resolveFlow db = \case
+    TechnosphereExchange{techFlowId = fid} ->
+        (\tf -> (tfName tf, Nothing)) <$> M.lookup fid (dbTechFlows db)
+    BiosphereExchange{bioFlowId = fid} ->
+        (\bf -> (bfName bf, bfCompartment bf)) <$> M.lookup fid (dbBioFlows db)
+    WasteExchange{waFlowId = fid} ->
+        (\wf -> (wfName wf, Nothing)) <$> M.lookup fid (dbWasteFlows db)
+
+{- | Build the cross-DB link map for one activity, keyed by consumer flow UUID.
+UUIDs are unique across flow kinds, so a tech and a waste link on the same
+activity cannot collide here.
+-}
+buildCrossDBLinkMap :: Database -> ProcessId -> M.Map UUID CrossDBLink
+buildCrossDBLinkMap db pid = case processIdToUUIDs db pid of
+    Just (actUUID, _) ->
+        M.fromList
+            [ (cdlConsumerFlowId link, link)
+            | link <- dbCrossDBLinks db
+            , cdlConsumerActUUID link == actUUID
+            , cdlConsumerFlowId link /= UUID.nil
+            ]
+    Nothing -> M.empty
+
+toExchangeWithUnit
+    :: UnitConfig
+    -> Database
+    -> M.Map UUID CrossDBLink
+    -> Exchange
+    -> ExchangeWithUnit
+toExchangeWithUnit cfg db links exchange =
+    -- Surface the raw UUID when the flow does not resolve — a clear failure
+    -- the consumer can debug, not a silent "unknown".
+    let unresolvedName = "<unresolved flow " <> UUID.toText (exchangeFlowId exchange) <> ">"
+        (flowName, compartment) = fromMaybe (unresolvedName, Nothing) (resolveFlow db exchange)
+        target = resolveTarget cfg db links exchange
+     in ExchangeWithUnit
+            { ewuExchange = exchange
+            , ewuUnitName = getUnitNameForExchange (dbUnits db) exchange
+            , ewuFlowName = flowName
+            , ewuCompartment = compartment
+            , ewuTargetActivity = trName <$> target
+            , ewuTargetLocation = trLocation <$> target
+            , ewuTargetProcessId = trProcessId <$> target
+            , ewuExComment = exchangeComment exchange
+            , ewuPedigree = exchangePedigree exchange
+            }
 
 {- | Get reference product name from activity exchanges. Reference products
 are always technosphere.

From 2812e08a1204ff24c436ff430813f1bf2589b832 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 23:23:51 +0200
Subject: [PATCH 27/43] refactor(service): unify getActivityExchangeDetails via
 FlowKind dispatch

The where-clause repeated the three-arm Exchange/flow-lookup ladder and
re-built the cross-DB link map by hand, mirroring the same shape as
convertActivityForAPI's old where-clause.

Lift the variant dispatch into a single 'FlowKind'-returning lookup:

- Types.hs: lookupExchangeFlow :: Database -> Exchange -> Maybe FlowKind
  plus flowKindCompartment :: FlowKind -> Maybe Compartment (alongside
  the existing flowKind* projections). One arm per variant, exhaustive.

- Service.hs: getActivityExchangeDetails is now four lines (filter, map);
  toExchangeDetail composes lookupExchangeFlow + apiFlowOfKind +
  flowKindUnitName instead of three parallel case arms. Cross-DB link
  resolution reuses the shared buildCrossDBLinkMap. crossDBLinkToSummary
  + resolveTargetSummary mirror the TargetRef helpers from the previous
  commit, with '<|>' for the getTargetActivity / cross-DB fallback chain.

- unresolvedUnit lifted to module-level constant (no longer recomputed
  per call).

- resolveFlow (introduced last commit) also collapses via
  lookupExchangeFlow + flowKindCompartment, so the two refactors
  converge on the same primitive.

Semantics preserved: biosphere exchanges still return Nothing target;
the cross-DB fallback fires only when getTargetActivity fails.

Verified: cabal build clean, cabal test 1107 / 0 failures.
---
 src/Service.hs | 185 ++++++++++++++++++++-----------------------------
 src/Types.hs   |  19 +++++
 2 files changed, 96 insertions(+), 108 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index 2bd50ee0..6e106f22 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -5,7 +5,7 @@
 
 module Service where
 
-import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), parseSubRef, unresolvedFlowName)
+import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), apiFlowOfKind, parseSubRef, unresolvedFlowName)
 import CLI.Types (DebugMatricesOptions (..))
 import Control.Applicative ((<|>))
 import Control.Concurrent.Async (mapConcurrently)
@@ -1175,13 +1175,9 @@ resolveTarget cfg db links = \case
 flow side by construction, so no Maybe-merge is needed downstream.
 -}
 resolveFlow :: Database -> Exchange -> Maybe (Text, Maybe Compartment)
-resolveFlow db = \case
-    TechnosphereExchange{techFlowId = fid} ->
-        (\tf -> (tfName tf, Nothing)) <$> M.lookup fid (dbTechFlows db)
-    BiosphereExchange{bioFlowId = fid} ->
-        (\bf -> (bfName bf, bfCompartment bf)) <$> M.lookup fid (dbBioFlows db)
-    WasteExchange{waFlowId = fid} ->
-        (\wf -> (wfName wf, Nothing)) <$> M.lookup fid (dbWasteFlows db)
+resolveFlow db exchange = do
+    fk <- lookupExchangeFlow db exchange
+    pure (flowKindName fk, flowKindCompartment fk)
 
 {- | Build the cross-DB link map for one activity, keyed by consumer flow UUID.
 UUIDs are unique across flow kinds, so a tech and a waste link on the same
@@ -1323,109 +1319,82 @@ getActivitiesUsingFlow db flowUUID =
                 , Just processId <- [findProcessIdForActivity db proc]
                 ]
 
-{- | Helper function to get detailed exchanges with filtering. Resolves
-cross-DB technosphere inputs (SimaPro pattern: activityLinkId is nil,
-the supplier lives in a dep DB via 'dbCrossDBLinks') by synthesizing an
-'ActivitySummary' with a qualified pid @"dbName::actUUID_prodUUID"@ —
-same convention the @/activity/{pid}@ endpoint uses.
+{- | Sentinel returned only when an exchange's unit UUID failed to resolve.
+The exchange unit-name field already surfaces the same gap via
+'getUnitNameForExchange', so consumers see the missing unit in both the
+structured 'Unit' and the unit-name string.
+-}
+unresolvedUnit :: Unit
+unresolvedUnit = Unit{unitId = UUID.nil, unitName = "<unresolved unit>", unitSymbol = "", unitComment = ""}
+
+{- | 'ActivitySummary' form of a cross-DB link target. Mirrors
+'crossDBLinkToTarget' but produces the richer wire shape consumed by the
+exchange-details endpoint.
+-}
+crossDBLinkToSummary :: CrossDBLink -> ActivitySummary
+crossDBLinkToSummary link =
+    ActivitySummary
+        { prsProcessId =
+            cdlSourceDatabase link
+                <> "::"
+                <> UUID.toText (cdlSupplierActUUID link)
+                <> "_"
+                <> UUID.toText (cdlSupplierProdUUID link)
+        , prsName = cdlFlowName link
+        , prsLocation = cdlLocation link
+        , prsProduct = cdlFlowName link
+        , prsProductAmount = 1.0
+        , prsProductUnit = cdlExchangeUnit link
+        , prsAllocationPercent = Nothing
+        , prsAllocationFormula = Nothing
+        , prsNativeType = Nothing
+        }
+
+{- | Resolve an exchange's target as 'ActivitySummary', falling back to the
+cross-DB link map for unresolved technosphere/waste links. Biosphere flows
+have no target by definition.
+-}
+resolveTargetSummary
+    :: Database
+    -> M.Map UUID CrossDBLink
+    -> Exchange
+    -> Maybe ActivitySummary
+resolveTargetSummary db links exchange = case exchange of
+    BiosphereExchange{} -> Nothing
+    TechnosphereExchange{} -> resolved
+    WasteExchange{} -> resolved
+  where
+    resolved =
+        getTargetActivity db exchange
+            <|> (crossDBLinkToSummary <$> M.lookup (exchangeFlowId exchange) links)
+
+{- | Detailed exchanges with filtering. Resolves cross-DB technosphere inputs
+(SimaPro pattern: @activityLinkId@ is nil, the supplier lives in a dep DB
+via 'dbCrossDBLinks') by synthesizing an 'ActivitySummary' with a qualified
+pid @"dbName::actUUID_prodUUID"@ — same convention the @/activity/{pid}@
+endpoint uses.
+
+A missing flow row used to drop the exchange entirely. We now surface an
+unresolved-flow entry instead, so the returned list always has one element
+per matching exchange and the gap is reportable.
 -}
 getActivityExchangeDetails :: Database -> Activity -> (Exchange -> Bool) -> [ExchangeDetail]
 getActivityExchangeDetails db activity filterFn =
-    map mkDetail (filter filterFn (exchanges activity))
-  where
-    -- A missing flow or unit row used to drop the exchange entirely. We now
-    -- surface an unresolved-flow entry instead, so the returned list always
-    -- has one element per matching exchange and the gap is reportable.
-    mkDetail exchange =
-        let unitForExchange = M.findWithDefault unresolvedUnit (exchangeUnitId exchange) (dbUnits db)
-            exUnitName = getUnitNameForExchange (dbUnits db) exchange
-            fid = exchangeFlowId exchange
-            unresolved flow = ExchangeDetail exchange flow "" unitForExchange exUnitName Nothing
-         in case exchange of
-                TechnosphereExchange{} -> case M.lookup fid (dbTechFlows db) of
-                    Just flow ->
-                        ExchangeDetail
-                            exchange
-                            (ApiTechFlow flow)
-                            (getUnitNameForTechFlow (dbUnits db) flow)
-                            unitForExchange
-                            exUnitName
-                            (resolveCrossDBTarget exchange)
-                    Nothing -> unresolved (ApiUnresolvedFlow fid)
-                BiosphereExchange{} -> case M.lookup fid (dbBioFlows db) of
-                    Just flow ->
-                        ExchangeDetail
-                            exchange
-                            (ApiBioFlow flow)
-                            (getUnitNameForBioFlow (dbUnits db) flow)
-                            unitForExchange
-                            exUnitName
-                            Nothing -- Biosphere flows have no target activity
-                    Nothing -> unresolved (ApiUnresolvedFlow fid)
-                WasteExchange{} -> case M.lookup fid (dbWasteFlows db) of
-                    Just flow ->
-                        ExchangeDetail
-                            exchange
-                            (ApiWasteFlow flow)
-                            (getUnitNameForWasteFlow (dbUnits db) flow)
-                            unitForExchange
-                            exUnitName
-                            (resolveCrossDBTarget exchange)
-                    Nothing -> unresolved (ApiUnresolvedFlow fid)
-
-    -- Sentinel returned only when the exchange's unit UUID itself failed
-    -- to resolve. The exchange unit name field already surfaces the same
-    -- gap via 'getUnitNameForExchange', so consumers see the missing unit
-    -- in both the structured Unit and the exUnitName string.
-    unresolvedUnit = Unit{unitId = UUID.nil, unitName = "<unresolved unit>", unitSymbol = "", unitComment = ""}
-
-    -- Cross-DB link lookup keyed by the consumer-side flow UUID
-    -- ('cdlConsumerFlowId'). Built once per activity; O(log n) per-exchange
-    -- resolution. Keying by UUID — rather than normalized flow name — avoids
-    -- the collision where a tech "X" link and a waste "X" link on the same
-    -- activity would overwrite each other in a name-keyed map.
-    crossLinkByFlow :: M.Map UUID CrossDBLink
-    crossLinkByFlow = case findProcessIdForActivity db activity >>= processIdToUUIDs db of
-        Just (actUUID, _) ->
-            M.fromList
-                [ (cdlConsumerFlowId link, link)
-                | link <- dbCrossDBLinks db
-                , cdlConsumerActUUID link == actUUID
-                , cdlConsumerFlowId link /= UUID.nil
-                ]
-        Nothing -> M.empty
-
-    -- Resolves a target activity for a technosphere or waste exchange. Orphan
-    -- waste outputs resolved by the exact-match cross-DB linker share the same
-    -- 'crossLinkByFlow' path as technosphere inputs; both are keyed on the
-    -- consumer-side flow UUID.
-    resolveCrossDBTarget exchange =
-        case getTargetActivity db exchange of
-            Just s -> Just s
-            Nothing -> crossDBTarget (exchangeFlowId exchange)
-
-    crossDBTarget consumerFlowId =
-        case M.lookup consumerFlowId crossLinkByFlow of
-            Nothing -> Nothing
-            Just link ->
-                let qualifiedPid =
-                        cdlSourceDatabase link
-                            <> "::"
-                            <> UUID.toText (cdlSupplierActUUID link)
-                            <> "_"
-                            <> UUID.toText (cdlSupplierProdUUID link)
-                 in Just
-                        ActivitySummary
-                            { prsProcessId = qualifiedPid
-                            , prsName = cdlFlowName link
-                            , prsLocation = cdlLocation link
-                            , prsProduct = cdlFlowName link
-                            , prsProductAmount = 1.0
-                            , prsProductUnit = cdlExchangeUnit link
-                            , prsAllocationPercent = Nothing
-                            , prsAllocationFormula = Nothing
-                            , prsNativeType = Nothing
-                            }
+    let linkMap = case findProcessIdForActivity db activity of
+            Just pid -> buildCrossDBLinkMap db pid
+            Nothing -> M.empty
+     in map (toExchangeDetail db linkMap) (filter filterFn (exchanges activity))
+
+toExchangeDetail :: Database -> M.Map UUID CrossDBLink -> Exchange -> ExchangeDetail
+toExchangeDetail db links exchange =
+    let unitForExchange = M.findWithDefault unresolvedUnit (exchangeUnitId exchange) (dbUnits db)
+        exUnitName = getUnitNameForExchange (dbUnits db) exchange
+        target = resolveTargetSummary db links exchange
+     in case lookupExchangeFlow db exchange of
+            Just fk ->
+                ExchangeDetail exchange (apiFlowOfKind fk) (flowKindUnitName (dbUnits db) fk) unitForExchange exUnitName target
+            Nothing ->
+                ExchangeDetail exchange (ApiUnresolvedFlow (exchangeFlowId exchange)) "" unitForExchange exUnitName Nothing
 
 -- | Get detailed input exchanges
 getActivityInputDetails :: Database -> Activity -> [ExchangeDetail]
diff --git a/src/Types.hs b/src/Types.hs
index 29acbc0f..6f2602d2 100644
--- a/src/Types.hs
+++ b/src/Types.hs
@@ -546,6 +546,12 @@ flowKindUnitName udb (TechKind f) = getUnitNameForTechFlow udb f
 flowKindUnitName udb (BioKind f) = getUnitNameForBioFlow udb f
 flowKindUnitName udb (WasteKind f) = getUnitNameForWasteFlow udb f
 
+-- | Biosphere compartment, if any. Tech/waste flows carry no compartment.
+flowKindCompartment :: FlowKind -> Maybe Compartment
+flowKindCompartment (TechKind _) = Nothing
+flowKindCompartment (BioKind f) = bfCompartment f
+flowKindCompartment (WasteKind _) = Nothing
+
 -- | Unit database (deduplicated)
 type UnitDB = M.Map UUID Unit
 
@@ -877,6 +883,19 @@ findProcessIdByProductFlow :: Database -> UUID -> Maybe ProcessId
 findProcessIdByProductFlow db flowUUID =
     M.lookup flowUUID (piByUUID $ dbProductIndex db)
 
+{- | Look up an exchange's flow on the appropriate side. Each exchange variant
+has exactly one flow side by construction (tech, bio, or waste), so the
+result is a single 'FlowKind' or 'Nothing' when the UUID is absent from the
+database.
+-}
+lookupExchangeFlow :: Database -> Exchange -> Maybe FlowKind
+lookupExchangeFlow db TechnosphereExchange{techFlowId = fid} =
+    TechKind <$> M.lookup fid (dbTechFlows db)
+lookupExchangeFlow db BiosphereExchange{bioFlowId = fid} =
+    BioKind <$> M.lookup fid (dbBioFlows db)
+lookupExchangeFlow db WasteExchange{waFlowId = fid} =
+    WasteKind <$> M.lookup fid (dbWasteFlows db)
+
 {- | Search products by name (for future product search feature)
 Returns all ProcessIds that produce products matching the given name
 -}

From 0cc8387e81bd4605f221329db8620620b8e8d778 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 23:29:52 +0200
Subject: [PATCH 28/43] refactor(service): factor graph builders into small
 named helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three graph-building functions in src/Service.hs each carried a deep
where-clause or do-block. The shapes were similar (per-variant node
construction, accumulator folds, biosphere-at-root special-casing) but
duplicated. Extract small helpers so each function reads top-to-bottom.

extractBiosphereNodesAndEdges:
  - mkBiosphereExportNode: pure ExportNode constructor
  - mkBiosphereTreeEdge: pure TreeEdge constructor with direction
  - maxBiosphereFlows: lift the magic 50 to a named top-level constant
  - Inner fold step is now four lines

extractNodesAndEdges:
  - mkActivityExportNode: shared between TreeLeaf and TreeNode (the two
    constructors that wrap an Activity)
  - mkLoopExportNode: TreeLoop-specific
  - mkTechnosphereTreeEdge: pure constructor for child edges
  - withRootBiosphere: collapses the 'if depth == 0 then bio else id'
    block that appeared in both TreeLeaf and TreeNode arms
  - Each arm is now ~5 lines

buildActivityGraph:
  - selectSignificantActivities: pure helper, no longer inlined; also
    drops a partial '!!' in favour of a comprehension-guard
  - isInputLinkTo: exhaustive predicate, replaces nested case-of inside L.find
  - mkGraphEdgeFromTriple: uses Maybe-do to short-circuit when either
    endpoint is below cutoff (avoids computing flow/unit fields that
    will be thrown away)
  - mkGraphNode: drops the 'error \"Invalid ProcessId in graph\"' partial
    function in favour of a sentinel node (Vector !? + fallback) —
    matches the "no silent errors, no runtime crashes" rules
  - Body shrinks from ~115 lines of inlined steps to ~13 lines of
    pipeline assembly
  - Switches the edge list-comp + 'Just e <- edges' filter to mapMaybe

Verified: cabal build clean, cabal test 1107 / 0 failures.
---
 src/Service.hs | 484 ++++++++++++++++++++++++-------------------------
 1 file changed, 239 insertions(+), 245 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index 6e106f22..a9253db1 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -20,7 +20,7 @@ import qualified Data.IntSet as IS
 import qualified Data.List as L
 import qualified Data.List.NonEmpty as NE
 import qualified Data.Map as M
-import Data.Maybe (catMaybes, fromMaybe, isJust)
+import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe)
 import Data.Sequence (Seq (..), (|>))
 import qualified Data.Set as S
 import Data.Text (Text)
@@ -463,146 +463,154 @@ extractCompartment category =
                             then "soil"
                             else "other"
 
--- | Extract biosphere exchanges from an activity and create nodes and edges
+{- | Cap on biosphere flows shown per activity. System processes can declare
+hundreds; we keep the top-N by |amount| to keep graphs renderable.
+-}
+maxBiosphereFlows :: Int
+maxBiosphereFlows = 50
+
+-- | ExportNode for a single biosphere flow attached to a parent activity.
+mkBiosphereExportNode :: UnitDB -> BiosphereFlow -> Text -> Int -> Bool -> ExportNode
+mkBiosphereExportNode units flow parentPid depth isEmission =
+    let compartmentTxt = bfCompartmentName flow
+     in ExportNode
+            { enId = UUID.toText (bfId flow)
+            , enName = bfName flow
+            , enDescription = [compartmentTxt]
+            , enLocation = ""
+            , enUnit = getUnitNameForBioFlow units flow
+            , enNodeType = if isEmission then BiosphereEmissionNode else BiosphereResourceNode
+            , enDepth = depth
+            , enLoopTarget = Nothing
+            , enParentId = Just parentPid
+            , enChildrenCount = 0
+            , enCompartment = Just compartmentTxt
+            }
+
+-- | Edge linking an activity to a biosphere flow. Direction depends on whether
+-- the exchange is an emission (activity -> flow) or a resource (flow -> activity).
+mkBiosphereTreeEdge :: UnitDB -> BiosphereFlow -> Text -> Bool -> Exchange -> TreeEdge
+mkBiosphereTreeEdge units flow activityPid isEmission ex =
+    let flowIdText = UUID.toText (bfId flow)
+        (edgeFrom, edgeTo, edgeType) =
+            if isEmission
+                then (activityPid, flowIdText, BiosphereEmissionEdge)
+                else (flowIdText, activityPid, BiosphereResourceEdge)
+     in TreeEdge
+            { teFrom = edgeFrom
+            , teTo = edgeTo
+            , teFlow = FlowInfo (bfId flow) (bfName flow) (bfCompartmentName flow)
+            , teQuantity = exchangeAmount ex
+            , teUnit = getUnitNameForBioFlow units flow
+            , teEdgeType = edgeType
+            }
+
+-- | Extract biosphere exchanges from an activity and create nodes and edges.
 extractBiosphereNodesAndEdges :: Database -> Activity -> Text -> Int -> M.Map Text ExportNode -> [TreeEdge] -> (M.Map Text ExportNode, [TreeEdge])
 extractBiosphereNodesAndEdges db activity activityProcessId depth nodeAcc edgeAcc =
-    let allBiosphereExchanges = [ex | ex <- exchanges activity, isBiosphereExchange ex]
-        -- Limit to top 50 most significant flows to prevent performance issues with system processes
-        maxBiosphereFlows = 50
-        biosphereExchanges =
-            take maxBiosphereFlows $
-                L.sortBy
-                    (\a b -> compare (abs (exchangeAmount b)) (abs (exchangeAmount a)))
-                    allBiosphereExchanges
-        processBiosphere ex (nodeAcc', edgeAcc') =
-            case M.lookup (exchangeFlowId ex) (dbBioFlows db) of
-                Nothing -> (nodeAcc', edgeAcc')
-                Just flow ->
-                    let flowIdText = UUID.toText (bfId flow)
-                        isEmission = not (exchangeIsInput ex) -- False = emission, True = resource
-                        nodeType = if isEmission then BiosphereEmissionNode else BiosphereResourceNode
-                        compartmentTxt = bfCompartmentName flow
-                        biosphereNode =
-                            ExportNode
-                                { enId = flowIdText
-                                , enName = bfName flow
-                                , enDescription = [compartmentTxt]
-                                , enLocation = ""
-                                , enUnit = getUnitNameForBioFlow (dbUnits db) flow
-                                , enNodeType = nodeType
-                                , enDepth = depth
-                                , enLoopTarget = Nothing
-                                , enParentId = Just activityProcessId
-                                , enChildrenCount = 0
-                                , enCompartment = Just compartmentTxt
-                                }
-                        nodeAcc'' = M.insert flowIdText biosphereNode nodeAcc'
-                        (edgeFrom, edgeTo, edgeType) =
-                            if isEmission
-                                then (activityProcessId, flowIdText, BiosphereEmissionEdge)
-                                else (flowIdText, activityProcessId, BiosphereResourceEdge)
-                        edge =
-                            TreeEdge
-                                { teFrom = edgeFrom
-                                , teTo = edgeTo
-                                , teFlow = FlowInfo (bfId flow) (bfName flow) compartmentTxt
-                                , teQuantity = exchangeAmount ex
-                                , teUnit = getUnitNameForBioFlow (dbUnits db) flow
-                                , teEdgeType = edgeType
-                                }
-                        edgeAcc'' = edge : edgeAcc'
-                     in (nodeAcc'', edgeAcc'')
-     in foldr processBiosphere (nodeAcc, edgeAcc) biosphereExchanges
-
--- | Extract nodes and edges from LoopAwareTree
+    foldr step (nodeAcc, edgeAcc) topBiosphereExchanges
+  where
+    units = dbUnits db
+    topBiosphereExchanges =
+        take maxBiosphereFlows $
+            L.sortBy (\a b -> compare (abs (exchangeAmount b)) (abs (exchangeAmount a))) $
+                filter isBiosphereExchange (exchanges activity)
+    step ex acc@(nodes, edges) = case M.lookup (exchangeFlowId ex) (dbBioFlows db) of
+        Nothing -> acc
+        Just flow ->
+            let isEmission = not (exchangeIsInput ex)
+                node = mkBiosphereExportNode units flow activityProcessId depth isEmission
+                edge = mkBiosphereTreeEdge units flow activityProcessId isEmission ex
+             in (M.insert (UUID.toText (bfId flow)) node nodes, edge : edges)
+
+-- | ExportNode for an activity-bearing tree node (TreeLeaf or TreeNode).
+mkActivityExportNode :: Database -> Activity -> Text -> Int -> Maybe Text -> ExportNode
+mkActivityExportNode db activity nodeId depth parentId =
+    ExportNode
+        { enId = nodeId
+        , enName = activityName activity
+        , enDescription = activityDescription activity
+        , enLocation = activityLocation activity
+        , enUnit = activityUnit activity
+        , enNodeType = ActivityNode
+        , enDepth = depth
+        , enLoopTarget = Nothing
+        , enParentId = parentId
+        , enChildrenCount = countPotentialChildren db activity
+        , enCompartment = Nothing
+        }
+
+-- | ExportNode for a TreeLoop. Looks up the referenced activity for its real
+-- unit and location; falls back to "N/A" sentinels when the referent is missing.
+mkLoopExportNode :: Database -> UUID -> Text -> Text -> Int -> Maybe Text -> ExportNode
+mkLoopExportNode db uuid nodeId name loopDepth parentId =
+    let (actualLocation, actualUnit) = case findActivityByActivityUUID db uuid of
+            Just act -> (activityLocation act, activityUnit act)
+            Nothing -> ("N/A", "N/A")
+     in ExportNode
+            { enId = nodeId
+            , enName = name
+            , enDescription = ["Loop reference"]
+            , enLocation = actualLocation
+            , enUnit = actualUnit
+            , enNodeType = LoopNode
+            , enDepth = loopDepth
+            , enLoopTarget = Just (UUID.toText uuid)
+            , enParentId = parentId
+            , enChildrenCount = 0
+            , enCompartment = Nothing
+            }
+
+{- | Attach biosphere nodes/edges to the accumulator when we're at the root of
+the tree (depth == 0). Below the root we leave the accumulator untouched to
+keep the graph readable.
+-}
+withRootBiosphere
+    :: Database
+    -> Activity
+    -> Text
+    -> Int
+    -> (M.Map Text ExportNode, [TreeEdge])
+    -> (M.Map Text ExportNode, [TreeEdge])
+withRootBiosphere db activity pid depth acc@(nodes, edges)
+    | depth == 0 = extractBiosphereNodesAndEdges db activity pid depth nodes edges
+    | otherwise = acc
+
+-- | Technosphere edge from the current node to a child subtree.
+mkTechnosphereTreeEdge :: UnitDB -> Text -> Text -> Double -> TechnosphereFlow -> TreeEdge
+mkTechnosphereTreeEdge units fromPid toPid quantity flow =
+    TreeEdge
+        { teFrom = fromPid
+        , teTo = toPid
+        , teFlow = FlowInfo (tfId flow) (tfName flow) ""
+        , teQuantity = quantity
+        , teUnit = getUnitNameForTechFlow units flow
+        , teEdgeType = TechnosphereEdge
+        }
+
+-- | Extract nodes and edges from a 'LoopAwareTree'.
 extractNodesAndEdges :: Database -> LoopAwareTree -> Int -> Maybe Text -> M.Map Text ExportNode -> [TreeEdge] -> (M.Map Text ExportNode, [TreeEdge], TreeStats)
 extractNodesAndEdges db tree depth parentId nodeAcc edgeAcc = case tree of
     TreeLeaf activity ->
-        let childrenCount = countPotentialChildren db activity
-            processIdText = getTreeNodeId db tree
-            node =
-                ExportNode
-                    { enId = processIdText -- Now ProcessId format
-                    , enName = activityName activity
-                    , enDescription = activityDescription activity
-                    , enLocation = activityLocation activity
-                    , enUnit = activityUnit activity
-                    , enNodeType = ActivityNode
-                    , enDepth = depth
-                    , enLoopTarget = Nothing
-                    , enParentId = parentId
-                    , enChildrenCount = childrenCount
-                    , enCompartment = Nothing
-                    }
-            nodes' = M.insert processIdText node nodeAcc -- Use ProcessId as key
-            -- Add biosphere nodes and edges only for depth 0 (root level)
-            (nodes'', edges') =
-                if depth == 0
-                    then extractBiosphereNodesAndEdges db activity processIdText depth nodes' edgeAcc
-                    else (nodes', edgeAcc)
+        let nodeId = getTreeNodeId db tree
+            nodes' = M.insert nodeId (mkActivityExportNode db activity nodeId depth parentId) nodeAcc
+            (nodes'', edges') = withRootBiosphere db activity nodeId depth (nodes', edgeAcc)
          in (nodes'', edges', TreeStats 1 0 1)
     TreeLoop uuid name loopDepth ->
-        let nodeId = getTreeNodeId db tree -- Use ProcessId format for consistency
-            uuidText = UUID.toText uuid -- Keep bare UUID for loopTarget
-            -- Look up the actual activity to get real unit and location
-            maybeActivity = findActivityByActivityUUID db uuid
-            (actualLocation, actualUnit) = case maybeActivity of
-                Just activity -> (activityLocation activity, activityUnit activity)
-                Nothing -> ("N/A", "N/A") -- Fallback only if activity not found
-            node =
-                ExportNode
-                    { enId = nodeId -- Now uses ProcessId format
-                    , enName = name
-                    , enDescription = ["Loop reference"]
-                    , enLocation = actualLocation
-                    , enUnit = actualUnit
-                    , enNodeType = LoopNode
-                    , enDepth = loopDepth
-                    , enLoopTarget = Just uuidText
-                    , enParentId = parentId
-                    , enChildrenCount = 0 -- Loops don't expand
-                    , enCompartment = Nothing
-                    }
-            nodes' = M.insert nodeId node nodeAcc -- Store with ProcessId format key
+        let nodeId = getTreeNodeId db tree
+            nodes' = M.insert nodeId (mkLoopExportNode db uuid nodeId name loopDepth parentId) nodeAcc
          in (nodes', edgeAcc, TreeStats 1 1 0)
     TreeNode activity children ->
-        let childrenCount = countPotentialChildren db activity
-            currentProcessId = getTreeNodeId db tree
-            parentNode =
-                ExportNode
-                    { enId = currentProcessId -- Now ProcessId format
-                    , enName = activityName activity
-                    , enDescription = activityDescription activity
-                    , enLocation = activityLocation activity
-                    , enUnit = activityUnit activity
-                    , enNodeType = ActivityNode
-                    , enDepth = depth
-                    , enLoopTarget = Nothing
-                    , enParentId = parentId
-                    , enChildrenCount = childrenCount
-                    , enCompartment = Nothing
-                    }
-            nodes' = M.insert currentProcessId parentNode nodeAcc -- Use ProcessId as key
-            processChild (quantity, flow, subtree) (nodeAcc', edgeAcc', statsAcc) =
-                let (childNodes, childEdges, childStats') = extractNodesAndEdges db subtree (depth + 1) (Just currentProcessId) nodeAcc' edgeAcc'
-                    edge =
-                        TreeEdge
-                            { teFrom = currentProcessId
-                            , teTo = getTreeNodeId db subtree
-                            , teFlow = FlowInfo (tfId flow) (tfName flow) ""
-                            , teQuantity = quantity
-                            , teUnit = getUnitNameForTechFlow (dbUnits db) flow
-                            , teEdgeType = TechnosphereEdge
-                            }
-                    newStats = statsAcc <> childStats'
-                 in (childNodes, edge : childEdges, newStats)
-            (finalNodes, finalEdges, combinedStats) = foldr processChild (nodes', edgeAcc, TreeStats 1 0 0) children
-            -- Add biosphere nodes and edges only for depth 0 (root level)
-            (finalNodesWithBio, finalEdgesWithBio) =
-                if depth == 0
-                    then extractBiosphereNodesAndEdges db activity currentProcessId depth finalNodes finalEdges
-                    else (finalNodes, finalEdges)
-         in (finalNodesWithBio, finalEdgesWithBio, combinedStats)
+        let nodeId = getTreeNodeId db tree
+            nodes' = M.insert nodeId (mkActivityExportNode db activity nodeId depth parentId) nodeAcc
+            (childNodes, childEdges, childStats) = foldr (processChild nodeId) (nodes', edgeAcc, TreeStats 1 0 0) children
+            (finalNodes, finalEdges) = withRootBiosphere db activity nodeId depth (childNodes, childEdges)
+         in (finalNodes, finalEdges, childStats)
+  where
+    processChild parentPid (quantity, flow, subtree) (nodes, edges, stats) =
+        let (n', e', s') = extractNodesAndEdges db subtree (depth + 1) (Just parentPid) nodes edges
+            edge = mkTechnosphereTreeEdge (dbUnits db) parentPid (getTreeNodeId db subtree) quantity flow
+         in (n', edge : e', stats <> s')
 
 -- | Convert LoopAwareTree to TreeExport format for JSON serialization
 convertToTreeExport :: Database -> Text -> Int -> LoopAwareTree -> TreeExport
@@ -665,126 +673,112 @@ filterTreeExport pat export =
         meta = (teTree export){tmTotalNodes = M.size filteredNodes}
      in export{teTree = meta, teNodes = filteredNodes, teEdges = filteredEdges}
 
-{- | Build activity network graph from factorized matrix column
-Uses efficient sparse matrix operations to extract connections
+{- | Activities whose absolute cumulative value clears the threshold, plus the
+root activity (which we always surface, even if it falls below). Out-of-bounds
+roots become a zero-valued entry rather than crashing.
+-}
+selectSignificantActivities :: Double -> ProcessId -> [Double] -> [(ProcessId, Double)]
+selectSignificantActivities threshold rootPid supplyList =
+    let kept =
+            [ (fromIntegral idx :: ProcessId, val)
+            | (idx, val) <- zip [(0 :: Int) ..] supplyList
+            , abs val > threshold || idx == fromIntegral rootPid
+            ]
+        rootInBounds = fromIntegral rootPid < length supplyList
+     in if rootInBounds then kept else (rootPid, 0.0) : kept
+
+{- | True iff the exchange is a technosphere @Input@ whose link points at the
+given target activity. Waste exchanges aren't traversed by the graph builder
+(they don't form upstream tech edges).
+-}
+isInputLinkTo :: UUID -> Exchange -> Bool
+isInputLinkTo targetUUID ex@TechnosphereExchange{techRole = Input} =
+    exchangeActivityLinkId ex == Just targetUUID
+isInputLinkTo _ TechnosphereExchange{} = False
+isInputLinkTo _ BiosphereExchange{} = False
+isInputLinkTo _ WasteExchange{} = False
+
+{- | Build one 'GraphEdge' from a sparse technosphere triple. Returns 'Nothing'
+when either endpoint is outside the projected subgraph (i.e. below cutoff) or
+the triple itself is zero. When the supplier flow can't be resolved we still
+emit the edge — with sentinel name/unit — so the gap is debuggable instead of
+silently dropped.
+-}
+mkGraphEdgeFromTriple
+    :: Database
+    -> V.Vector Activity
+    -> UnitDB
+    -> TechFlowDB
+    -> M.Map ProcessId Int
+    -> SparseTriple
+    -> Maybe GraphEdge
+mkGraphEdgeFromTriple db activities units flows nodeIdMap (SparseTriple row col value)
+    | value == 0.0 = Nothing
+    | otherwise = do
+        let sourcePid = fromIntegral row :: ProcessId
+            targetPid = fromIntegral col :: ProcessId
+        src <- M.lookup sourcePid nodeIdMap
+        tgt <- M.lookup targetPid nodeIdMap
+        let matchingExchange = do
+                srcAct <- activities V.!? fromIntegral row
+                (targetUUID, _) <- processIdToUUIDs db targetPid
+                L.find (isInputLinkTo targetUUID) (exchanges srcAct)
+            flowInfo = matchingExchange >>= \ex -> M.lookup (exchangeFlowId ex) flows
+            uName = maybe "<unresolved unit>" (getUnitNameForTechFlow units) flowInfo
+            flowName = case (flowInfo, matchingExchange) of
+                (Just f, _) -> tfName f
+                (Nothing, Just ex) -> unresolvedFlowName (exchangeFlowId ex)
+                (Nothing, Nothing) -> "<unresolved flow>"
+        pure $ GraphEdge src tgt (realToFrac value) uName flowName
+
+{- | 'GraphNode' for one significant activity. Out-of-bounds 'ProcessId's get a
+sentinel node rather than crashing — preserves the project's "no silent
+errors, no silent successes" stance.
+-}
+mkGraphNode :: Database -> V.Vector Activity -> Int -> (ProcessId, Double) -> GraphNode
+mkGraphNode db activities nodeId (pid, cumulativeVal) =
+    let processIdText = processIdToText db pid
+     in case activities V.!? fromIntegral pid of
+            Just activity ->
+                GraphNode
+                    { gnNodeId = nodeId
+                    , gnLabel = activityName activity
+                    , gnValue = cumulativeVal
+                    , gnUnit = activityUnit activity
+                    , gnProcessId = processIdText
+                    , gnLocation = activityLocation activity
+                    }
+            Nothing ->
+                GraphNode
+                    { gnNodeId = nodeId
+                    , gnLabel = "<unresolved activity " <> processIdText <> ">"
+                    , gnValue = cumulativeVal
+                    , gnUnit = ""
+                    , gnProcessId = processIdText
+                    , gnLocation = ""
+                    }
+
+{- | Build activity network graph from factorized matrix column.
+Uses efficient sparse matrix operations to extract connections.
 -}
 buildActivityGraph :: Database -> SharedSolver -> Text -> Double -> IO (Either ServiceError GraphExport)
-buildActivityGraph db sharedSolver queryText cutoffPercent = do
+buildActivityGraph db sharedSolver queryText cutoffPercent =
     case resolveActivityAndProcessId db queryText of
-        Left err -> return $ Left err
+        Left err -> pure (Left err)
         Right (processId, _activity) -> do
-            -- Step 1: Get factorized column (cumulative amounts) by solving
-            let activityIndex = dbActivityIndex db
-                demandVec = buildDemandVectorFromIndex activityIndex processId
-
-            -- Solve to get cumulative amounts (lazy factorization on first call)
-            supplyVec <- solveWithSharedSolver sharedSolver demandVec
+            supplyVec <- solveWithSharedSolver sharedSolver (buildDemandVectorFromIndex (dbActivityIndex db) processId)
             let supplyList = toList supplyVec
-                totalSupply = sum [abs val | val <- supplyList]
-                threshold = totalSupply * (cutoffPercent / 100.0)
-
-            -- Step 2: Filter by cutoff to get significant activities
-            -- Build list of (ProcessId, cumulative value) for activities above threshold
-            -- Always include the root activity (processId) even if below threshold
-            let allSignificantActivities =
-                    [ (fromIntegral idx :: ProcessId, val)
-                    | (idx, val) <- zip [(0 :: Int) ..] supplyList
-                    , abs val > threshold
-                    ]
-                -- Ensure root activity is always included
-                significantActivities =
-                    if processId `elem` map fst allSignificantActivities
-                        then allSignificantActivities
-                        else
-                            let rootValue =
-                                    if fromIntegral processId < length supplyList
-                                        then supplyList !! fromIntegral processId
-                                        else 0.0
-                             in (processId, rootValue) : allSignificantActivities
-
-            -- Step 3: Build node ID mapping (ProcessId -> Int) for frontend efficiency
-            let nodeIdMap = M.fromList [(pid, idx) | (idx, (pid, _)) <- zip [0 ..] significantActivities]
-
-            -- Step 4: Extract direct connections from technosphere matrix
-            -- For each significant activity, find edges in dbTechnosphereTriples
-            let techTriples = dbTechnosphereTriples db
+                threshold = sum (map abs supplyList) * (cutoffPercent / 100.0)
+                significantActivities = selectSignificantActivities threshold processId supplyList
+                nodeIdMap = M.fromList [(pid, idx) | (idx, (pid, _)) <- zip [0 ..] significantActivities]
                 activities = dbActivities db
-                units = dbUnits db
-                flows = dbTechFlows db
-
-                -- Build edges: iterate through sparse triplets
                 edges =
-                    [ let sourceNodeId = M.lookup (fromIntegral row :: ProcessId) nodeIdMap
-                          targetNodeId = M.lookup (fromIntegral col :: ProcessId) nodeIdMap
-                          sourceActivity =
-                            if fromIntegral row < V.length activities
-                                then Just $ activities V.! fromIntegral row
-                                else Nothing
-                          targetProcessId = fromIntegral col :: ProcessId
-                          -- Get target activity UUID from process ID table
-                          targetActivityUUID = case processIdToUUIDs db targetProcessId of
-                            Just (actUUID, _prodUUID) -> Just actUUID
-                            Nothing -> Nothing
-                          -- Find the technosphere-input exchange that points to
-                          -- this target activity, so we can recover both the
-                          -- flow UUID and (if known) its name + unit.
-                          matchingExchange = do
-                            srcAct <- sourceActivity
-                            targetUUID <- targetActivityUUID
-                            L.find
-                                ( \ex -> case ex of
-                                    TechnosphereExchange{techRole = Input} ->
-                                        exchangeActivityLinkId ex == Just targetUUID
-                                    TechnosphereExchange{} -> False
-                                    BiosphereExchange{} -> False
-                                    -- Waste exchanges aren't traversed by the graph builder
-                                    -- (they don't form upstream tech edges).
-                                    WasteExchange{} -> False
-                                )
-                                (exchanges srcAct)
-                          flowInfo = matchingExchange >>= \ex -> M.lookup (exchangeFlowId ex) flows
-                          -- When the supplier flow isn't in the tech map, the
-                          -- exchange UUID is the only debug handle we have.
-                          -- Surface it via the same sentinel shape as
-                          -- ExchangeWithUnit / ApiUnresolvedFlow so the graph
-                          -- doesn't silently mask a broken link.
-                          uName = maybe "<unresolved unit>" (getUnitNameForTechFlow units) flowInfo
-                          flowNameText = case (flowInfo, matchingExchange) of
-                            (Just f, _) -> tfName f
-                            (Nothing, Just ex) -> unresolvedFlowName (exchangeFlowId ex)
-                            (Nothing, Nothing) -> "<unresolved flow>"
-                       in case (sourceNodeId, targetNodeId) of
-                            (Just src, Just tgt) ->
-                                Just $ GraphEdge src tgt (realToFrac value) uName flowNameText
-                            _ -> Nothing
-                    | SparseTriple row col value <- U.toList techTriples
-                    , value /= 0.0
-                    ]
-
-                validEdges = [e | Just e <- edges]
-
-            -- Step 5: Build nodes
-            let nodes =
-                    [ let activity =
-                            if fromIntegral pid < V.length activities
-                                then activities V.! fromIntegral pid
-                                else error $ "Invalid ProcessId in graph: " ++ show pid
-                          processIdText = processIdToText db pid
-                       in GraphNode
-                            { gnNodeId = nodeId
-                            , gnLabel = activityName activity
-                            , gnValue = cumulativeVal
-                            , gnUnit = activityUnit activity
-                            , gnProcessId = processIdText
-                            , gnLocation = activityLocation activity
-                            }
-                    | (nodeId, (pid, cumulativeVal)) <- zip [0 ..] significantActivities
-                    ]
-
-            -- Step 6: Build unit groups for normalization
-            let unitGroups = buildUnitGroups [gnUnit n | n <- nodes]
-
-            return $ Right $ GraphExport nodes validEdges unitGroups
+                    mapMaybe
+                        (mkGraphEdgeFromTriple db activities (dbUnits db) (dbTechFlows db) nodeIdMap)
+                        (U.toList (dbTechnosphereTriples db))
+                nodes = zipWith (mkGraphNode db activities) [0 ..] significantActivities
+                unitGroups = buildUnitGroups (map gnUnit nodes)
+            pure $ Right $ GraphExport nodes edges unitGroups
 
 -- | Classify units into groups for edge width normalization
 buildUnitGroups :: [Text] -> M.Map Text Text

From dcf6e25234ce4d58957b391d9c3855fab3a09bd4 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 23:34:22 +0200
Subject: [PATCH 29/43] refactor(main): factor server bootstrap into
 single-purpose helpers

runServerWithConfig was a 70-line staircase doing eleven things;
createServerApp wrapped a 50-line inner closure that mixed routing,
static-file serving and SPA fallback. Each step now has a name.

runServerWithConfig (~15 lines now):
  - applyLoadOverride: pure, applies --load to the config
  - logLoadedDatabases: collapses the if/M.null branch
  - resolvePassword: 3-tier CLI -> config -> env fallback
  - logServerStartup: desktop-mode vs human-banner split
  - setupIdleTimeout: allocates refs and (optionally) forks the watchdog
  - wrapWithMiddleware: idleTracking . shutdown . (maybe authMiddleware)

createServerApp (~10 lines now):
  - swaggerHtml: lifted to top-level constant
  - spaStaticSettings: the wai-app-static config
  - serveStripped / serveSpaIndex: the two static handlers, each now a
    standalone Application instead of let-bindings inside a closure
  - dispatchRequest: the path-based routing if/guards, parameterised on
    pre-built mcp + Servant apps so the inner closure shrinks to two lines
  - logRequest: per-request stdout log line

Behaviour preserved (cabal test 1107 / 0 failures). Each new helper has
a doc comment and is independently understandable.
---
 app/Main.hs | 286 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 166 insertions(+), 120 deletions(-)

diff --git a/app/Main.hs b/app/Main.hs
index 72e25d25..be95a1d0 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -48,7 +48,7 @@ import Data.String (fromString)
 import Network.HTTP.Types (status200)
 import Network.HTTP.Types.Header (hCacheControl, hContentType, hPragma)
 import Network.Wai (Application, Request (..), Response, ResponseReceived, mapResponseHeaders, pathInfo, rawPathInfo, rawQueryString, requestHeaders, requestMethod, responseLBS, responseStream)
-import Network.Wai.Application.Static (defaultWebAppSettings, ssIndices, staticApp)
+import Network.Wai.Application.Static (StaticSettings, defaultWebAppSettings, ssIndices, staticApp)
 import Network.Wai.Handler.Warp (defaultSettings, runSettings, setPort, setTimeout)
 import Servant (serve)
 import WaiAppStatic.Types (MaxAge (..), ssMaxAge, unsafeToPiece)
@@ -133,76 +133,97 @@ runStopWithoutConfig cliConfig = do
     rc <- resolveRemoteConfig (globalOptions cliConfig) Nothing
     executeRemoteCommand mgr rc (globalOptions cliConfig) Stop
 
--- | Run server with multi-database configuration file
-runServerWithConfig :: CLIConfig -> ServerOptions -> FilePath -> IO ()
-runServerWithConfig cliConfig serverOpts cfgFile = do
-    config <- loadConfigOrDie cfgFile
-
-    -- Apply --load override if specified
-    let effectiveConfig = case serverLoadDbs serverOpts of
-            Nothing -> config
-            Just dbNames -> config{cfgDatabases = map (overrideLoad dbNames) (cfgDatabases config)}
+-- | Apply the --load override (if any) to the in-memory config.
+applyLoadOverride :: ServerOptions -> Config -> Config
+applyLoadOverride serverOpts config = case serverLoadDbs serverOpts of
+    Nothing -> config
+    Just dbNames -> config{cfgDatabases = map (overrideLoad dbNames) (cfgDatabases config)}
 
-    -- Initialize DatabaseManager (pre-loads databases with load=true)
-    reportProgress Info "Initializing database manager..."
-    dbManager <- initDatabaseManager effectiveConfig (noCache (globalOptions cliConfig)) (Just cfgFile)
-
-    -- Log database status (allow starting with no databases for BYOL mode)
+-- | Log loaded databases (allows starting with none for BYOL mode).
+logLoadedDatabases :: DatabaseManager -> IO ()
+logLoadedDatabases dbManager = do
     loadedDbs <- readTVarIO (dmLoadedDbs dbManager)
-    if M.null loadedDbs
-        then reportProgress Info "No databases loaded - upload or load one via the web interface"
-        else reportProgress Info $ "Loaded databases: " ++ intercalate ", " (map T.unpack (M.keys loadedDbs))
-
-    let port = fromMaybe (scPort (cfgServer effectiveConfig)) (serverPort serverOpts)
-
-    -- Initialize matrix solver (no-op for MUMPS, kept for API compatibility)
-    initializeSolverForServer
+    reportProgress Info $
+        if M.null loadedDbs
+            then "No databases loaded - upload or load one via the web interface"
+            else "Loaded databases: " ++ intercalate ", " (map T.unpack (M.keys loadedDbs))
 
-    -- Get password from CLI, config, or env var
-    password <- case CLI.Types.serverPassword (globalOptions cliConfig) of
-        Just pwd -> return (Just pwd)
-        Nothing -> case scPassword (cfgServer effectiveConfig) of
-            Just pwd -> return (Just $ T.unpack pwd)
-            Nothing -> lookupEnv "VOLCA_PASSWORD"
-
-    -- Determine static directory (--static-dir or default "web/dist")
-    let staticDir = fromMaybe "web/dist" (serverStaticDir serverOpts)
-        desktopMode = serverDesktopMode serverOpts
-
-    -- In desktop mode, print machine-readable port for launcher, then minimal logging
-    if desktopMode
-        then do
-            putStrLn $ "VOLCA_PORT=" ++ show port
-            hFlush stdout
-        else do
-            reportProgress Info $ "Starting API server on port " ++ show port
-            reportProgress Info $ "Tree depth: " ++ show (serverTreeDepth serverOpts)
-            case password of
-                Just _ -> reportProgress Info "Authentication: ENABLED"
-                Nothing -> reportProgress Info "Authentication: DISABLED (use --password or VOLCA_PASSWORD to enable)"
-            reportProgress Info $ "Web interface available at: http://localhost:" ++ show port ++ "/"
-
-    -- Idle timeout: track last request time, watchdog activated on demand via API
+{- | Resolve the admin password from CLI flag, config file, or env var, in that
+order. Returns 'Nothing' when authentication is disabled (no source set).
+-}
+resolvePassword :: GlobalOptions -> ServerConfig -> IO (Maybe String)
+resolvePassword globalOpts serverCfg = case CLI.Types.serverPassword globalOpts of
+    Just pwd -> pure (Just pwd)
+    Nothing -> case scPassword serverCfg of
+        Just pwd -> pure (Just (T.unpack pwd))
+        Nothing -> lookupEnv "VOLCA_PASSWORD"
+
+{- | In desktop mode, print a machine-readable port line for the launcher
+to capture and stay quiet. Otherwise emit the human-facing startup banner.
+-}
+logServerStartup :: ServerOptions -> Int -> Maybe String -> IO ()
+logServerStartup serverOpts port password
+    | serverDesktopMode serverOpts = do
+        putStrLn ("VOLCA_PORT=" ++ show port)
+        hFlush stdout
+    | otherwise = do
+        reportProgress Info ("Starting API server on port " ++ show port)
+        reportProgress Info ("Tree depth: " ++ show (serverTreeDepth serverOpts))
+        reportProgress Info $ case password of
+            Just _ -> "Authentication: ENABLED"
+            Nothing -> "Authentication: DISABLED (use --password or VOLCA_PASSWORD to enable)"
+        reportProgress Info ("Web interface available at: http://localhost:" ++ show port ++ "/")
+
+{- | Allocate the idle-tracking refs and fork the watchdog when
+@--idle-timeout@ is positive. The refs are returned for both the
+tracking and the shutdown middleware.
+-}
+setupIdleTimeout :: ServerOptions -> IO (IORef UTCTime, IORef Bool)
+setupIdleTimeout serverOpts = do
     lastRequestRef <- newIORef =<< getCurrentTime
     idleActiveRef <- newIORef False
-
-    -- If --idle-timeout is set, activate immediately (for scripts)
     let idleTimeout = serverIdleTimeout serverOpts
     when (idleTimeout > 0) $ do
-        reportProgress Info $ "Idle timeout: " ++ show idleTimeout ++ "s"
+        reportProgress Info ("Idle timeout: " ++ show idleTimeout ++ "s")
         writeIORef idleActiveRef True
-        _ <- forkIO $ idleWatchdog lastRequestRef idleActiveRef idleTimeout
+        _ <- forkIO (idleWatchdog lastRequestRef idleActiveRef idleTimeout)
         pure ()
+    pure (lastRequestRef, idleActiveRef)
 
-    -- Create app with DatabaseManager - API handlers fetch current DB dynamically
-    baseApp <- Main.createServerApp dbManager (serverTreeDepth serverOpts) staticDir desktopMode password (cfgHosting effectiveConfig) (cfgClassificationPresets effectiveConfig)
-    let appWithIdleAndShutdown =
+-- | Stack idle-tracking, shutdown-endpoint and (optionally) auth middleware.
+wrapWithMiddleware :: Maybe String -> IORef UTCTime -> IORef Bool -> Application -> Application
+wrapWithMiddleware password lastRequestRef idleActiveRef baseApp =
+    let withIdleAndShutdown =
             idleTrackingMiddleware lastRequestRef $
                 shutdownEndpoint lastRequestRef idleActiveRef baseApp
-        finalApp = case password of
-            Just pwd -> authMiddleware (C8.pack pwd) appWithIdleAndShutdown
-            Nothing -> appWithIdleAndShutdown
-        settings = setTimeout 600 $ setPort port defaultSettings
+     in case password of
+            Just pwd -> authMiddleware (C8.pack pwd) withIdleAndShutdown
+            Nothing -> withIdleAndShutdown
+
+-- | Run server with multi-database configuration file
+runServerWithConfig :: CLIConfig -> ServerOptions -> FilePath -> IO ()
+runServerWithConfig cliConfig serverOpts cfgFile = do
+    config <- applyLoadOverride serverOpts <$> loadConfigOrDie cfgFile
+    reportProgress Info "Initializing database manager..."
+    dbManager <- initDatabaseManager config (noCache (globalOptions cliConfig)) (Just cfgFile)
+    logLoadedDatabases dbManager
+    initializeSolverForServer
+    let port = fromMaybe (scPort (cfgServer config)) (serverPort serverOpts)
+        staticDir = fromMaybe "web/dist" (serverStaticDir serverOpts)
+    password <- resolvePassword (globalOptions cliConfig) (cfgServer config)
+    logServerStartup serverOpts port password
+    (lastRequestRef, idleActiveRef) <- setupIdleTimeout serverOpts
+    baseApp <-
+        createServerApp
+            dbManager
+            (serverTreeDepth serverOpts)
+            staticDir
+            (serverDesktopMode serverOpts)
+            password
+            (cfgHosting config)
+            (cfgClassificationPresets config)
+    let finalApp = wrapWithMiddleware password lastRequestRef idleActiveRef baseApp
+        settings = setTimeout 600 (setPort port defaultSettings)
     runSettings settings finalApp
 
 {- | Run config load-only mode (load all databases from config and exit)
@@ -226,7 +247,87 @@ overrideLoad :: [T.Text] -> DatabaseConfig -> DatabaseConfig
 overrideLoad dbNames dbConfig =
     dbConfig{dcLoad = dcName dbConfig `elem` dbNames}
 
--- | Create a Wai application with DatabaseManager
+{- | Swagger-UI shell that pulls the OpenAPI spec from our @/api/v1/openapi.json@
+endpoint. Served verbatim from @/api/v1/docs@; constant per build.
+-}
+swaggerHtml :: BSL.ByteString
+swaggerHtml =
+    "<!DOCTYPE html><html><head><title>volca API</title>\
+    \<meta charset=\"utf-8\"/>\
+    \<link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui.css\">\
+    \</head><body>\
+    \<div id=\"swagger-ui\"></div>\
+    \<script src=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui-bundle.js\"></script>\
+    \<script>SwaggerUIBundle({url:\"/api/v1/openapi.json\",dom_id:\"#swagger-ui\"})</script>\
+    \</body></html>"
+
+{- | Serve the Elm SPA bundle from @staticDir@, with the SPA's @index.html@ as
+the directory index and no @max-age@ caching headers.
+-}
+spaStaticSettings :: FilePath -> StaticSettings
+spaStaticSettings staticDir =
+    (defaultWebAppSettings staticDir)
+        { ssIndices = [unsafeToPiece (T.pack "index.html")]
+        , ssMaxAge = NoMaxAge
+        }
+
+{- | Serve files under @/static/<rest>@ by stripping the prefix and delegating
+to wai-app-static.
+-}
+serveStripped :: StaticSettings -> Application
+serveStripped settings req respond =
+    let strippedPath = BS.drop 7 (rawPathInfo req)
+        newPathInfo = case pathInfo req of
+            (segment : rest) | segment == T.pack "static" -> rest
+            other -> other
+        staticReq = req{rawPathInfo = strippedPath, pathInfo = newPathInfo}
+     in staticApp settings staticReq respond
+
+{- | Serve the SPA shell (@index.html@) for any non-API path, with cache-busting
+headers so the browser always re-fetches the latest bundle.
+-}
+serveSpaIndex :: StaticSettings -> Application
+serveSpaIndex settings req respond =
+    let indexReq = req{rawPathInfo = C8.pack "/", pathInfo = []}
+        noCacheRespond res =
+            respond $
+                mapResponseHeaders
+                    ( \hs ->
+                        (hCacheControl, C8.pack "no-cache, no-store, must-revalidate")
+                            : (hPragma, C8.pack "no-cache")
+                            : hs
+                    )
+                    res
+     in staticApp settings indexReq noCacheRespond
+
+{- | Path-based request dispatcher. The fixed endpoints (@/mcp@,
+@/api/v1/{openapi.json,licenses,docs,logs/stream}@) match exactly; anything
+under @/api/@ goes through Servant; @/static/@ serves bundled assets; the
+catch-all hands back the SPA so client-side routing can handle the URL.
+-}
+dispatchRequest :: FilePath -> Application -> Application -> Application
+dispatchRequest staticDir mcp apiApp req respond =
+    let path = rawPathInfo req
+        settings = spaStaticSettings staticDir
+     in if
+            | path == "/mcp" -> mcp req respond
+            | path == "/api/v1/openapi.json" ->
+                respond $ responseLBS status200 [(hContentType, "application/json")] (encode volcaOpenApi)
+            | path == "/api/v1/licenses" -> respond licensesResponse
+            | path == "/api/v1/docs" ->
+                respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml
+            | path == "/api/v1/logs/stream" -> handleLogStream req respond
+            | C8.pack "/api/" `BS.isPrefixOf` path -> apiApp req respond
+            | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped settings req respond
+            | otherwise -> serveSpaIndex settings req respond
+
+-- | Per-request log line written to stdout (suppressed in desktop mode).
+logRequest :: Request -> IO ()
+logRequest req = do
+    putStrLn $ C8.unpack (requestMethod req) ++ " " ++ C8.unpack (rawPathInfo req <> rawQueryString req)
+    hFlush stdout
+
+-- | Create a Wai application with DatabaseManager.
 createServerApp :: DatabaseManager -> Int -> FilePath -> Bool -> Maybe String -> Maybe HostingConfig -> [ClassificationPreset] -> IO Application
 createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingConfig filterPresets = do
     -- The MCP @web_url@ deep links point at Elm SPA routes served from
@@ -236,66 +337,11 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon
     unless (desktopMode || hasFrontend) $
         reportProgress Info "Frontend not bundled — MCP responses will omit 'web_url'"
     mcp <- mcpApp dbManager filterPresets hasFrontend
-    let openApiJson = encode volcaOpenApi
-        swaggerHtml =
-            "<!DOCTYPE html><html><head><title>volca API</title>\
-            \<meta charset=\"utf-8\"/>\
-            \<link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui.css\">\
-            \</head><body>\
-            \<div id=\"swagger-ui\"></div>\
-            \<script src=\"https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui-bundle.js\"></script>\
-            \<script>SwaggerUIBundle({url:\"/api/v1/openapi.json\",dom_id:\"#swagger-ui\"})</script>\
-            \</body></html>"
-    return $ \req respond -> do
-        let path = rawPathInfo req
-            qs = rawQueryString req
-            fullUrl = path <> qs
-
-        -- Simple request logging (suppress in desktop mode)
-        unless desktopMode $ do
-            putStrLn $ C8.unpack (requestMethod req) ++ " " ++ C8.unpack fullUrl
-            hFlush stdout
-
-        -- Route requests based on path prefix.
-        let staticSettings =
-                (defaultWebAppSettings staticDir)
-                    { ssIndices = [unsafeToPiece (T.pack "index.html")]
-                    , ssMaxAge = NoMaxAge
-                    }
-
-            serveStripped =
-                let strippedPath = BS.drop 7 path
-                    newPathInfo = case pathInfo req of
-                        (segment : rest) | segment == T.pack "static" -> rest
-                        other -> other
-                    staticReq = req{rawPathInfo = strippedPath, pathInfo = newPathInfo}
-                 in staticApp staticSettings staticReq respond
-
-            serveSpaIndex =
-                let indexReq = req{rawPathInfo = C8.pack "/", pathInfo = []}
-                    noCacheRespond res =
-                        respond $
-                            mapResponseHeaders
-                                ( \hs ->
-                                    (hCacheControl, C8.pack "no-cache, no-store, must-revalidate")
-                                        : (hPragma, C8.pack "no-cache")
-                                        : hs
-                                )
-                                res
-                 in staticApp staticSettings indexReq noCacheRespond
-
-        if
-            | path == "/mcp" -> mcp req respond
-            | path == "/api/v1/openapi.json" ->
-                respond $ responseLBS status200 [(hContentType, "application/json")] openApiJson
-            | path == "/api/v1/licenses" -> respond licensesResponse
-            | path == "/api/v1/docs" ->
-                respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml
-            | path == "/api/v1/logs/stream" -> handleLogStream req respond
-            | C8.pack "/api/" `BS.isPrefixOf` path ->
-                serve lcaAPI (lcaServer (mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets)) req respond
-            | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped
-            | otherwise -> serveSpaIndex
+    let env = mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets
+        apiApp = serve lcaAPI (lcaServer env)
+    pure $ \req respond -> do
+        unless desktopMode (logRequest req)
+        dispatchRequest staticDir mcp apiApp req respond
 
 -- | SSE endpoint for real-time log streaming
 handleLogStream :: Request -> (Response -> IO ResponseReceived) -> IO ResponseReceived

From 095f2605a524c05b4cb35e55ace341c169c12d6d Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Wed, 27 May 2026 23:39:20 +0200
Subject: [PATCH 30/43] refactor(service): dedupe ActivitySummary builders,
 slim searchActivities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The (ProcessId, Activity) -> ActivitySummary projection was inlined in
five places — search results, supply-chain entries, the products index,
the technosphere navigation helper, and the per-flow consumer list — each
running the same getReferenceProductInfo + activity-allocation +
native-type plumbing. Extract once, replace at every call site that uses
'dbUnits db' as the unit DB.

New helpers:
  - mkActivitySummary :: Database -> ProcessId -> Activity -> ActivitySummary
  - unknownActivitySummary :: Database -> ProcessId -> ActivitySummary
    (surfaces a typed placeholder when the products index points at a
    dropped Activity, instead of silently weaving Nothings through the
    record)

searchActivities body shrunk from ~55 to ~16 lines:
  - tryBm25Retrieve uses 'do' + 'guard' over Maybe instead of nested
    case-with-guards, expressing the four predicates as a flat chain
  - activityRowComparator extracted to a named top-level (the inline
    case-of needed two-level matching to compare per-tuple)
  - paginateSearchResults wraps the offset / limit / hasMore arithmetic
    plus the per-row projection, returning the wire-format SearchResults
    directly — the per-call boilerplate disappears

getActivitiesUsingFlow, getTargetActivity, getAllProductsForActivity
each collapse to a couple of lines.

Verified: cabal build clean, cabal test 1107 / 0 failures.
---
 src/Service.hs | 193 ++++++++++++++++++++++++-------------------------
 1 file changed, 93 insertions(+), 100 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index a9253db1..fa2e8496 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -10,7 +10,7 @@ import CLI.Types (DebugMatricesOptions (..))
 import Control.Applicative ((<|>))
 import Control.Concurrent.Async (mapConcurrently)
 import Control.Exception (SomeException, try)
-import Control.Monad (foldM)
+import Control.Monad (foldM, guard)
 import Control.Monad.Trans.Except (ExceptT (..), runExceptT)
 import Data.Aeson (Value, object, toJSON, (.=))
 import Data.Either (fromRight, lefts, rights)
@@ -904,67 +904,59 @@ nameFilterSet db mq = do
     q <- mq
     if T.null (T.strip q) then Nothing else bm25MatchingPids db q
 
+{- | BM25 retrieval applies only when the user provided a non-empty name
+query, didn't request exact matching, and didn't pick an explicit sort
+column. Returns 'Nothing' otherwise so the caller falls back to lex-sorted
+field matching.
+-}
+tryBm25Retrieve :: Database -> SearchFilter -> Maybe [(ProcessId, Activity)]
+tryBm25Retrieve db (SearchFilter core exactMatch) = do
+    q <- afcName core
+    guard (not exactMatch)
+    guard (afcSort core /= Just "name" && afcSort core /= Just "location")
+    guard (not (T.null (T.strip q)))
+    bm25Retrieve db q
+
+{- | Lex-comparator for activity rows. Defaults to name; 'Just "location"'
+picks the location key.
+-}
+activityRowComparator :: Maybe Text -> (ProcessId, Activity) -> (ProcessId, Activity) -> Ordering
+activityRowComparator (Just "location") (_, a) (_, b) = compare (activityLocation a) (activityLocation b)
+activityRowComparator _ (_, a) (_, b) = compare (activityName a) (activityName b)
+
+{- | Apply pagination (offset / limit, defaulting limit to 20) and emit a
+'SearchResults' wrapping the projected page. Pure modulo the supplied
+@searchTimeMs@.
+-}
+paginateSearchResults :: Maybe Int -> Maybe Int -> Double -> ((ProcessId, Activity) -> a) -> [(ProcessId, Activity)] -> SearchResults a
+paginateSearchResults offsetParam limitParam searchTimeMs project xs =
+    let offset = maybe 0 (max 0) offsetParam
+        limit = fromMaybe 20 limitParam
+        total = length xs
+        page = map project (take limit (drop offset xs))
+        hasMore = offset + limit < total
+     in SearchResults page total offset limit hasMore searchTimeMs
+
 {- | Search activities (returns same format as API). The exact-match toggle is
 carried on 'SearchFilter' itself, so there is no separate positional flag.
 -}
 searchActivities :: Database -> SearchFilter -> IO (Either ServiceError Value)
-searchActivities db (SearchFilter core exactMatch) = do
-    let nameParam = afcName core
-        geoParam = afcLocation core
-        productParam = afcProduct core
-        classFilters = afcClassifications core
-        limitParam = afcLimit core
-        offsetParam = afcOffset core
-        sortParam = afcSort core
-        orderParam = afcOrder core
+searchActivities db sFilter@(SearchFilter core exactMatch) = do
     startTime <- getCurrentTime
-    let isDesc = orderParam == Just "desc"
-        explicitSort = sortParam == Just "name" || sortParam == Just "location"
-        -- BM25 retrieval applies only when the user provided a non-empty name
-        -- query, didn't request exact matching, and didn't pick a sort column.
-        bm25Retrieved = case nameParam of
-            Just q
-                | not exactMatch
-                , not explicitSort
-                , not (T.null (T.strip q)) ->
-                    bm25Retrieve db q
-            _ -> Nothing
-        actCmp = case sortParam of
-            Just "location" -> \(_, a) (_, b) -> compare (activityLocation a) (activityLocation b)
-            _ -> \(_, a) (_, b) -> compare (activityName a) (activityName b)
-        allResults = case bm25Retrieved of
+    let allResults = case tryBm25Retrieve db sFilter of
             Just ranked ->
                 -- BM25 path: ranked candidates → structured filters → preserve score order.
-                applyStructuredFilters db geoParam productParam classFilters False ranked
+                applyStructuredFilters db (afcLocation core) (afcProduct core) (afcClassifications core) False ranked
             Nothing ->
                 -- Non-BM25 path: AND-of-tokens name filter + lex sort.
-                let rawResults = findActivitiesByFields db nameParam geoParam productParam classFilters exactMatch
-                 in L.sortBy (if isDesc then flip actCmp else actCmp) rawResults
-        offset = maybe 0 (max 0) offsetParam
-        limit = fromMaybe 20 limitParam
-        total = length allResults
-        pagedResults = take limit $ drop offset allResults
-        hasMore = offset + limit < total
-        activityResults =
-            map
-                ( \(processId, activity) ->
-                    let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity
-                     in ActivitySummary
-                            { prsProcessId = processIdToText db processId
-                            , prsName = activityName activity
-                            , prsLocation = activityLocation activity
-                            , prsProduct = prodName
-                            , prsProductAmount = prodAmount
-                            , prsProductUnit = prodUnit
-                            , prsAllocationPercent = activityAllocationPercent activity
-                            , prsAllocationFormula = activityAllocationFormula activity
-                            , prsNativeType = activityNativeType activity
-                            }
-                )
-                pagedResults
+                let cmp = activityRowComparator (afcSort core)
+                    ordered = if afcOrder core == Just "desc" then flip cmp else cmp
+                    raw = findActivitiesByFields db (afcName core) (afcLocation core) (afcProduct core) (afcClassifications core) exactMatch
+                 in L.sortBy ordered raw
     endTime <- getCurrentTime
     let searchTimeMs = realToFrac (diffUTCTime endTime startTime) * 1000 :: Double
-    return $ Right $ toJSON $ SearchResults activityResults total offset limit hasMore searchTimeMs
+        results = paginateSearchResults (afcOffset core) (afcLimit core) searchTimeMs (uncurry (mkActivitySummary db)) allResults
+    pure $ Right $ toJSON results
 
 -- | List all classification systems and their distinct values for a database
 getClassifications :: Database -> [ClassificationSystem]
@@ -1232,49 +1224,62 @@ getReferenceProductInfo flows units activity =
              in (name, amount, uName)
         [] -> ("", 1.0, "")
 
--- | Get all products (ProcessIds) for an activity UUID using the products index
+{- | Build an 'ActivitySummary' from a (ProcessId, Activity) pair. Encapsulates
+the reference-product + allocation + native-type projection shared by
+search results, supply-chain entries, inventory metadata, and exchange-target
+navigation. Uses @dbUnits db@ for the unit DB — callers needing a merged
+cross-DB unit DB build the record by hand.
+-}
+mkActivitySummary :: Database -> ProcessId -> Activity -> ActivitySummary
+mkActivitySummary db processId activity =
+    let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity
+     in ActivitySummary
+            { prsProcessId = processIdToText db processId
+            , prsName = activityName activity
+            , prsLocation = activityLocation activity
+            , prsProduct = prodName
+            , prsProductAmount = prodAmount
+            , prsProductUnit = prodUnit
+            , prsAllocationPercent = activityAllocationPercent activity
+            , prsAllocationFormula = activityAllocationFormula activity
+            , prsNativeType = activityNativeType activity
+            }
+
+{- | Placeholder summary surfaced when the products index points at a
+ProcessId that no longer resolves to an Activity. Carries the raw pid so the
+consumer can debug, rather than silently dropping the entry.
+-}
+unknownActivitySummary :: Database -> ProcessId -> ActivitySummary
+unknownActivitySummary db pid =
+    ActivitySummary
+        { prsProcessId = processIdToText db pid
+        , prsName = "Unknown"
+        , prsLocation = ""
+        , prsProduct = "Unknown"
+        , prsProductAmount = 1.0
+        , prsProductUnit = ""
+        , prsAllocationPercent = Nothing
+        , prsAllocationFormula = Nothing
+        , prsNativeType = Nothing
+        }
+
+-- | Get all products (ProcessIds) for an activity UUID using the products index.
 getAllProductsForActivity :: Database -> UUID -> [ActivitySummary]
 getAllProductsForActivity db activityUUID =
     case M.lookup activityUUID (dbActivityProductsIndex db) of
+        Nothing -> []
         Just processIds ->
-            [ let mAct = findActivityByProcessId db pid
-                  (prodName, prodAmount, prodUnit) = case mAct of
-                    Just a -> getReferenceProductInfo (dbTechFlows db) (dbUnits db) a
-                    Nothing -> ("Unknown", 1.0, "")
-               in ActivitySummary
-                    { prsProcessId = processIdToText db pid
-                    , prsName = maybe "Unknown" activityName mAct
-                    , prsLocation = maybe "" activityLocation mAct
-                    , prsProduct = prodName
-                    , prsProductAmount = prodAmount
-                    , prsProductUnit = prodUnit
-                    , prsAllocationPercent = mAct >>= activityAllocationPercent
-                    , prsAllocationFormula = mAct >>= activityAllocationFormula
-                    , prsNativeType = mAct >>= activityNativeType
-                    }
+            [ maybe (unknownActivitySummary db pid) (mkActivitySummary db pid) (findActivityByProcessId db pid)
             | pid <- processIds
             ]
-        Nothing -> []
 
--- | Get target activity for technosphere navigation
+-- | Get target activity for technosphere navigation.
 getTargetActivity :: Database -> Exchange -> Maybe ActivitySummary
 getTargetActivity db exchange = do
     targetId <- exchangeActivityLinkId exchange
     targetActivity <- findActivityByActivityUUID db targetId
     processId <- findProcessIdForActivity db targetActivity
-    let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) targetActivity
-    return $
-        ActivitySummary
-            { prsProcessId = processIdToText db processId
-            , prsName = activityName targetActivity
-            , prsLocation = activityLocation targetActivity
-            , prsProduct = prodName
-            , prsProductAmount = prodAmount
-            , prsProductUnit = prodUnit
-            , prsAllocationPercent = activityAllocationPercent targetActivity
-            , prsAllocationFormula = activityAllocationFormula targetActivity
-            , prsNativeType = activityNativeType targetActivity
-            }
+    pure (mkActivitySummary db processId targetActivity)
 
 {- | Get reference product as FlowDetail (if exists). Reference products are
 technosphere by definition.
@@ -1289,29 +1294,17 @@ getActivityReferenceProductDetail db activity = do
     let uName = getUnitNameForTechFlow (dbUnits db) flow
     return $ FlowDetail (ApiTechFlow flow) uName usageCount
 
--- | Get activities that use a specific flow as ActivitySummary list
+-- | Get activities that use a specific flow as ActivitySummary list.
 getActivitiesUsingFlow :: Database -> UUID -> [ActivitySummary]
 getActivitiesUsingFlow db flowUUID =
     case M.lookup flowUUID (idxByFlow $ dbIndexes db) of
         Nothing -> []
         Just activityUUIDs ->
-            let uniqueUUIDs = S.toList $ S.fromList activityUUIDs -- Deduplicate activity UUIDs
-             in [ let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) proc
-                   in ActivitySummary
-                        { prsProcessId = processIdToText db processId
-                        , prsName = activityName proc
-                        , prsLocation = activityLocation proc
-                        , prsProduct = prodName
-                        , prsProductAmount = prodAmount
-                        , prsProductUnit = prodUnit
-                        , prsAllocationPercent = activityAllocationPercent proc
-                        , prsAllocationFormula = activityAllocationFormula proc
-                        , prsNativeType = activityNativeType proc
-                        }
-                | procUUID <- uniqueUUIDs
-                , Just proc <- [findActivityByActivityUUID db procUUID]
-                , Just processId <- [findProcessIdForActivity db proc]
-                ]
+            [ mkActivitySummary db processId proc
+            | procUUID <- S.toList (S.fromList activityUUIDs)
+            , Just proc <- [findActivityByActivityUUID db procUUID]
+            , Just processId <- [findProcessIdForActivity db proc]
+            ]
 
 {- | Sentinel returned only when an exchange's unit UUID failed to resolve.
 The exchange unit-name field already surfaces the same gap via

From 7fcd75d0571586be0c030873aa2c8c66fbac6621 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 17:50:48 +0200
Subject: [PATCH 31/43] ecospold1: dedup the two SAX folds into shared handlers
 (#94)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

`EcoSpold.Parser1` carried two ~95% identical `X.fold` parsers —
`parseWithXeno`
(first dataset) and `parseAllWithXeno` (all datasets) — including
byte-for-byte copies
of `buildExchange` and `buildResult`. This consolidates them and
flattens the
"triangular" handlers, with no behavior change.

- **One shared handler set.** All SAX callbacks plus
`buildExchange`/`buildResult` are
lifted to top level. Both public functions become thin drivers over a
single
`foldEcoSpold1`, differing only in how they read the final `ParseState`.
- **Flat guarded setters.** The per-field `if isElement … then … else
<old>` attribute
updates become guarded single-field setters (`setRefFunctionAttr`,
`setExchangeAttr`).
- **`buildExchange`** is now three top-level guards (`isWasteFlow` /
`isBiosphere` /
  technosphere) over a shared `where`, instead of a nested-if pyramid.
- **Close handlers** gain `popPath`/`popElement` helpers, removing
repeated
  path/text-accumulator boilerplate.
- **Exhaustive matches.** Every `ElementContext` and `Exchange` match is
now total
  (no wildcards), so incomplete matches are caught at compile time.

Net: 811 → 556 lines (−255), no new dependencies beyond
`Data.Bifunctor`.

## Test plan

- [x] `cabal build` — clean, no incomplete-pattern warnings
- [x] `cabal run lca-tests -- --match "/EcoSpold1/"` — 48/48 pass (error
cases,
      comments, waste-on-inputGroup=5, multi-dataset)
- [x] Full suite — 1107 examples, 0 failures (LoaderSpec exercises the
EcoSpold1 path)
---
 src/EcoSpold/Parser1.hs | 891 +++++++++++++++-------------------------
 1 file changed, 327 insertions(+), 564 deletions(-)

diff --git a/src/EcoSpold/Parser1.hs b/src/EcoSpold/Parser1.hs
index 2b7eba0a..0c18de02 100644
--- a/src/EcoSpold/Parser1.hs
+++ b/src/EcoSpold/Parser1.hs
@@ -25,6 +25,7 @@ module EcoSpold.Parser1 (
 ) where
 
 import Control.Monad (forM_)
+import Data.Bifunctor (first)
 import qualified Data.ByteString as BS
 import Data.Either (lefts, rights)
 import qualified Data.Map as M
@@ -147,303 +148,333 @@ initialParseState =
         , psCompletedActivities = []
         }
 
--- | Xeno SAX parser for EcoSpold1
-parseWithXeno :: BS.ByteString -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)
-parseWithXeno xmlContent =
-    case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of
-        Left err -> Left (show err)
-        Right finalState ->
-            case psCompletedActivities finalState of
-                (result : _) -> result
-                [] -> buildResult finalState
+-- ----------------------------------------------------------------------------
+-- Shared SAX handlers (used by both parseWithXeno and parseAllWithXeno)
+-- ----------------------------------------------------------------------------
+
+-- | Drop the current element from the path, keeping accumulated text.
+popPath :: ParseState -> ParseState
+popPath s = s{psPath = drop 1 (psPath s)}
+
+-- | Drop the current element from the path and clear accumulated text.
+popElement :: ParseState -> ParseState
+popElement s = s{psPath = drop 1 (psPath s), psTextAccum = []}
+
+-- | Open tag: push the element onto the path and switch context on the
+-- structural elements we care about. Groups only enter their context from
+-- within an exchange; any other current context is preserved.
+onOpenTag :: ParseState -> BS.ByteString -> ParseState
+onOpenTag state tagName =
+    state{psPath = tagName : psPath state, psContext = newContext, psTextAccum = []}
+  where
+    enterGroup wrap = case psContext state of
+        InExchange edata -> wrap edata
+        ctx -> ctx
+    newContext
+        | isElement tagName "referenceFunction" = InReferenceFunction
+        | isElement tagName "geography" = InGeography
+        | isElement tagName "exchange" = InExchange emptyExchangeData
+        | isElement tagName "inputGroup" = enterGroup InInputGroup
+        | isElement tagName "outputGroup" = enterGroup InOutputGroup
+        | otherwise = psContext state
+
+-- | Attribute: route by current context to the matching field setter.
+onAttribute :: ParseState -> BS.ByteString -> BS.ByteString -> ParseState
+onAttribute state name value = case psContext state of
+    InReferenceFunction -> setRefFunctionAttr name value state
+    InGeography
+        | isElement name "location" -> state{psLocation = Just (bsToText value)}
+        | otherwise -> state
+    InExchange edata -> state{psContext = InExchange (setExchangeAttr name value edata)}
+    InInputGroup _ -> datasetNumberAttr
+    InOutputGroup _ -> datasetNumberAttr
+    Other -> datasetNumberAttr
   where
-    -- Open tag handler
-    openTag state tagName =
-        let newPath = tagName : psPath state
-            newContext
-                | isElement tagName "referenceFunction" = InReferenceFunction
-                | isElement tagName "geography" = InGeography
-                | isElement tagName "exchange" = InExchange emptyExchangeData
-                | isElement tagName "inputGroup" =
-                    case psContext state of
-                        InExchange edata -> InInputGroup edata -- Preserve exchange data
-                        _ -> psContext state
-                | isElement tagName "outputGroup" =
-                    case psContext state of
-                        InExchange edata -> InOutputGroup edata -- Preserve exchange data
-                        _ -> psContext state
-                | otherwise = psContext state
-         in state{psPath = newPath, psContext = newContext, psTextAccum = []}
-
-    -- Attribute handler
-    attribute state name value =
-        case psContext state of
-            InReferenceFunction ->
-                let st =
-                        state
-                            { psActivityName =
-                                if isElement name "name"
-                                    then Just (bsToText value)
-                                    else psActivityName state
-                            , psRefUnit =
-                                if isElement name "unit"
-                                    then Just (bsToText value)
-                                    else psRefUnit state
-                            , psActivityCategory =
-                                if isElement name "category"
-                                    then bsToText value
-                                    else psActivityCategory state
-                            , psActivitySubCategory =
-                                if isElement name "subCategory"
-                                    then bsToText value
-                                    else psActivitySubCategory state
-                            , psDescription =
-                                if isElement name "generalComment" && not (BS.null value)
-                                    then bsToText value : psDescription state
-                                    else psDescription state
-                            }
-                 in st
-            InGeography ->
-                if isElement name "location"
-                    then state{psLocation = Just (bsToText value)}
-                    else state
-            InExchange edata ->
-                let updated =
-                        edata
-                            { exNumber = if isElement name "number" then bsToInt value else exNumber edata
-                            , exName = if isElement name "name" then bsToText value else exName edata
-                            , exCategory = if isElement name "category" then bsToText value else exCategory edata
-                            , exSubCategory = if isElement name "subCategory" then bsToText value else exSubCategory edata
-                            , exLocation = if isElement name "location" then bsToText value else exLocation edata
-                            , exUnit = if isElement name "unit" then bsToText value else exUnit edata
-                            , exMeanValue = if isElement name "meanValue" then bsToDouble value else exMeanValue edata
-                            , exCASNumber = if isElement name "CASNumber" then bsToText value else exCASNumber edata
-                            , exFormula = if isElement name "formula" then bsToText value else exFormula edata
-                            , exInfrastructure =
-                                if isElement name "infrastructureProcess"
-                                    then bsToText value == "true"
-                                    else exInfrastructure edata
-                            , exComment = if isElement name "generalComment" then bsToText value else exComment edata
-                            }
-                 in state{psContext = InExchange updated}
-            _ ->
-                -- Handle dataset number at top level
-                if isElement name "number" && any (isElement "dataset") (psPath state)
-                    then state{psDatasetNumber = bsToInt value}
-                    else state
-
-    -- End of opening tag
-    endOpen state _tagName = state
-
-    -- Text content handler
-    text state content =
-        let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content
-         in if BS.null trimmed
-                then state
-                else state{psTextAccum = trimmed : psTextAccum state}
-
-    -- Close tag handler
-    closeTag state tagName
-        | isElement tagName "inputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InInputGroup edata ->
-                        -- Restore parent exchange context with updated inputGroup
-                        state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InExchange edata ->
-                        state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "outputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InOutputGroup edata ->
-                        -- Restore parent exchange context with updated outputGroup
-                        state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InExchange edata ->
-                        state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "exchange" =
-            case psContext state of
-                InExchange edata ->
-                    let (exchange, parsedFlow, unit) = buildExchange (psDatasetNumber state) (psLocation state) edata
-                        !supplierLinks = case exchange of
-                            TechnosphereExchange{techRole = Input}
-                                | exNumber edata /= 0 ->
-                                    M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state)
-                            _ -> psSupplierLinks state
-                        (techs, bios, wastes) = case parsedFlow of
-                            ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state)
-                            ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state)
-                            ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state)
-                     in state
-                            { psExchanges = exchange : psExchanges state
-                            , psTechFlows = techs
-                            , psBioFlows = bios
-                            , psWasteFlows = wastes
-                            , psUnits = unit : psUnits state
-                            , psSupplierLinks = supplierLinks
-                            , psContext = Other
-                            , psPath = drop 1 (psPath state)
-                            , psTextAccum = []
-                            }
-                _ -> state{psPath = drop 1 (psPath state)}
-        | isElement tagName "referenceFunction" =
-            state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "geography" =
-            state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
-        -- Handle dataset close tag: accumulate completed activity for multi-dataset files
-        | isElement tagName "dataset" =
-            let !result = buildResult state
-                -- Reset dataset-specific fields for next dataset
-                -- Preserve: psPath (after popping current element), psCompletedActivities
-                resetState =
-                    state
-                        { psCompletedActivities = result : psCompletedActivities state
-                        , psDatasetNumber = 0
-                        , psActivityName = Nothing
-                        , psActivityCategory = ""
-                        , psActivitySubCategory = ""
-                        , psLocation = Nothing
-                        , psRefUnit = Nothing
-                        , psDescription = []
-                        , psExchanges = []
-                        , psTechFlows = []
-                        , psBioFlows = []
-                        , psWasteFlows = []
-                        , psUnits = []
-                        , psContext = Other
-                        , psTextAccum = []
-                        , psSupplierLinks = M.empty
-                        }
-             in resetState{psPath = drop 1 (psPath state)}
-        | otherwise =
-            state{psPath = drop 1 (psPath state)}
-
-    -- CDATA handler
-    cdata = text
-
-    -- Build exchange, flow, and unit from exchange data
-    -- activityLoc is the activity's location for fallback
-    buildExchange :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit)
-    buildExchange datasetNum activityLoc edata =
-        let flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata)
-            unitId = generateUnitUUID (exUnit edata)
-
-            -- Determine flow type from input/output groups
-            -- EcoSpold1 groups:
-            -- Input: 1-3 = technosphere, 4 = resource (biosphere)
-            -- Output: 0 = reference product, 1-3 = byproduct/co-product, 4 = emission (biosphere)
-            inputGroup = exInputGroup edata
-            outputGroup = exOutputGroup edata
-
-            isBiosphere = inputGroup == "4" || outputGroup == "4"
-            isInput = not (T.null inputGroup)
-            isReferenceProduct = outputGroup == "0"
-            -- SimaPro's third flow class ('Final waste flows'). EcoSpold1
-            -- exports surface them on inputGroup=5 to fit the 4-type
-            -- input/output model, but the category attribute survives.
-            -- Route to WasteExchange so they bypass cross-DB technosphere
-            -- linking (orphan outputs, not demands) and land in the
-            -- dedicated waste-side surfaces.
-            isWasteFlow = exCategory edata == "Final waste flows"
-
-            -- Exchange location: use exchange's own location
-            -- For technosphere: leave empty if not specified, so Loader can use name-only lookup
-            -- For biosphere: fall back to activity location (biosphere flows don't need supplier linking)
-            exchangeLocation =
-                if T.null (exLocation edata)
-                    then
-                        if isBiosphere
-                            then fromMaybe "" activityLoc
-                            else "" -- Technosphere: leave empty for name-only lookup in Loader
-                    else exLocation edata
-
-            cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata)
-            unit = Unit unitId (exUnit edata) (exUnit edata) ""
-
-            -- Role: EcoSpold1 never emits ReferenceInput (no waste-treatment encoding here)
-            techRoleFor
-                | isReferenceProduct = ReferenceProduct
-                | isInput = Input
-                | otherwise = Coproduct
-         in -- Set activityLinkId to nil - will be resolved later in Loader using
-            -- (flowName, exchangeLocation) lookup against supplier activities
-            if isWasteFlow
-                then
-                    let wf = WasteFlow flowId (exName edata) unitId M.empty cas Nothing
-                        ex =
-                            WasteExchange
-                                { waFlowId = flowId
-                                , waAmount = exMeanValue edata
-                                , waUnitId = unitId
-                                , -- Final waste flows surface on inputGroup=5
-                                  -- (consumer's POV: input from a hypothetical
-                                  -- treatment service). Preserve that semantic
-                                  -- by mirroring isInput here.
-                                  waIsInput = isInput
-                                , waActivityLinkId = UUID.nil
-                                , waProcessLinkId = Nothing
-                                , waLocation = exchangeLocation
-                                , waComment = nonEmptyText (exComment edata)
-                                , waPedigree = Nothing
-                                }
-                     in (ex, ParsedWaste wf, unit)
-                else
-                    if isBiosphere
-                        then
-                            let subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata)
-                                compartment =
-                                    if T.null (exCategory edata) && isNothing subCat
-                                        then Nothing
-                                        else Just (Compartment (exCategory edata) subCat)
-                                bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment
-                                ex =
-                                    BiosphereExchange
-                                        { bioFlowId = flowId
-                                        , bioAmount = exMeanValue edata
-                                        , bioUnitId = unitId
-                                        , bioDirection = if inputGroup == "4" then Resource else Emission
-                                        , bioLocation = exchangeLocation
-                                        , bioComment = nonEmptyText (exComment edata)
-                                        , bioPedigree = Nothing
-                                        }
-                             in (ex, ParsedBio bioFlow, unit)
-                        else
-                            let techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing
-                                ex =
-                                    TechnosphereExchange
-                                        { techFlowId = flowId
-                                        , techAmount = exMeanValue edata
-                                        , techUnitId = unitId
-                                        , techRole = techRoleFor
-                                        , techActivityLinkId = UUID.nil
-                                        , techProcessLinkId = Nothing
-                                        , techLocation = exchangeLocation
-                                        , techComment = nonEmptyText (exComment edata)
-                                        , techPedigree = Nothing
-                                        }
-                             in (ex, ParsedTech techFlow, unit)
-
-    -- Build final result
-    buildResult :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)
-    buildResult st =
-        let name = fromMaybe "Unknown Activity" (psActivityName st)
-            location = fromMaybe "GLO" (psLocation st)
-            refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st)
-            description = reverse (psDescription st)
-            classifications =
-                M.fromList $
-                    filter
-                        (not . T.null . snd)
-                        [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)]
-            activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing
-            techs = reverse (psTechFlows st)
-            bios = reverse (psBioFlows st)
-            wastes = reverse (psWasteFlows st)
-            units = reverse (psUnits st)
-         in case applyCutoffStrategy activity of
-                Right act -> Right (act, techs, bios, wastes, units, psDatasetNumber st, psSupplierLinks st)
-                Left err -> Left err
+    -- The dataset's numeric id lives on the top-level <dataset> element.
+    datasetNumberAttr
+        | isElement name "number" && any (isElement "dataset") (psPath state) =
+            state{psDatasetNumber = bsToInt value}
+        | otherwise = state
+
+-- | Apply a single referenceFunction attribute to the parse state.
+setRefFunctionAttr :: BS.ByteString -> BS.ByteString -> ParseState -> ParseState
+setRefFunctionAttr name value st
+    | isElement name "name" = st{psActivityName = Just (bsToText value)}
+    | isElement name "unit" = st{psRefUnit = Just (bsToText value)}
+    | isElement name "category" = st{psActivityCategory = bsToText value}
+    | isElement name "subCategory" = st{psActivitySubCategory = bsToText value}
+    | isElement name "generalComment", not (BS.null value) =
+        st{psDescription = bsToText value : psDescription st}
+    | otherwise = st
+
+-- | Apply a single exchange attribute to the in-progress exchange.
+setExchangeAttr :: BS.ByteString -> BS.ByteString -> ExchangeData -> ExchangeData
+setExchangeAttr name value e
+    | isElement name "number" = e{exNumber = bsToInt value}
+    | isElement name "name" = e{exName = bsToText value}
+    | isElement name "category" = e{exCategory = bsToText value}
+    | isElement name "subCategory" = e{exSubCategory = bsToText value}
+    | isElement name "location" = e{exLocation = bsToText value}
+    | isElement name "unit" = e{exUnit = bsToText value}
+    | isElement name "meanValue" = e{exMeanValue = bsToDouble value}
+    | isElement name "CASNumber" = e{exCASNumber = bsToText value}
+    | isElement name "formula" = e{exFormula = bsToText value}
+    | isElement name "infrastructureProcess" = e{exInfrastructure = bsToText value == "true"}
+    | isElement name "generalComment" = e{exComment = bsToText value}
+    | otherwise = e
+
+-- | End of an opening tag: nothing to do for this format.
+onEndOpen :: ParseState -> BS.ByteString -> ParseState
+onEndOpen state _tagName = state
+
+-- | Accumulate non-blank text content (also used for CDATA).
+onText :: ParseState -> BS.ByteString -> ParseState
+onText state content =
+    let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content
+     in if BS.null trimmed
+            then state
+            else state{psTextAccum = trimmed : psTextAccum state}
+
+-- | Close tag: finalise the element that is ending.
+onCloseTag :: ParseState -> BS.ByteString -> ParseState
+onCloseTag state tagName
+    | isElement tagName "inputGroup" = closeGroup restoreInputGroup (\e t -> e{exInputGroup = t}) state
+    | isElement tagName "outputGroup" = closeGroup restoreOutputGroup (\e t -> e{exOutputGroup = t}) state
+    | isElement tagName "exchange" = closeExchange state
+    | isElement tagName "referenceFunction" = (popElement state){psContext = Other}
+    | isElement tagName "geography" = (popElement state){psContext = Other}
+    | isElement tagName "dataset" = closeDataset state
+    | otherwise = popPath state
+
+-- | Close an input/output group: fold its accumulated text into the parent
+-- exchange's matching group field and return to the exchange context.
+-- @ownGroup@ yields the exchange data to restore when the current context is
+-- the group being closed (or, defensively, a bare exchange); any other context
+-- just pops the element. The opposite group never restores, so a stray
+-- </inputGroup> inside an <outputGroup> (malformed) is ignored, not merged.
+closeGroup :: (ElementContext -> Maybe ExchangeData) -> (ExchangeData -> Text -> ExchangeData) -> ParseState -> ParseState
+closeGroup ownGroup setField state =
+    case ownGroup (psContext state) of
+        Just edata -> (popElement state){psContext = InExchange (setField edata txt)}
+        Nothing -> popElement state
+  where
+    txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
+
+-- | Exchange data to restore when closing an <inputGroup>: the group we opened,
+-- or (defensively) a bare exchange. The opposite group does not restore.
+restoreInputGroup :: ElementContext -> Maybe ExchangeData
+restoreInputGroup (InInputGroup edata) = Just edata
+restoreInputGroup (InExchange edata) = Just edata
+restoreInputGroup InOutputGroup{} = Nothing
+restoreInputGroup InReferenceFunction = Nothing
+restoreInputGroup InGeography = Nothing
+restoreInputGroup Other = Nothing
+
+-- | Exchange data to restore when closing an <outputGroup>: the group we opened,
+-- or (defensively) a bare exchange. The opposite group does not restore.
+restoreOutputGroup :: ElementContext -> Maybe ExchangeData
+restoreOutputGroup (InOutputGroup edata) = Just edata
+restoreOutputGroup (InExchange edata) = Just edata
+restoreOutputGroup InInputGroup{} = Nothing
+restoreOutputGroup InReferenceFunction = Nothing
+restoreOutputGroup InGeography = Nothing
+restoreOutputGroup Other = Nothing
+
+-- | Close an exchange: build its exchange/flow/unit, accumulate them, and
+-- record the supplier link for technosphere inputs.
+closeExchange :: ParseState -> ParseState
+closeExchange state = case psContext state of
+    InExchange edata ->
+        let (exchange, parsedFlow, unit) = buildExchange (psDatasetNumber state) (psLocation state) edata
+            !supplierLinks = case exchange of
+                TechnosphereExchange{techRole = Input}
+                    | exNumber edata /= 0 ->
+                        M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state)
+                TechnosphereExchange{} -> psSupplierLinks state
+                BiosphereExchange{} -> psSupplierLinks state
+                WasteExchange{} -> psSupplierLinks state
+            (techs, bios, wastes) = case parsedFlow of
+                ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state)
+                ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state)
+                ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state)
+         in (popElement state)
+                { psExchanges = exchange : psExchanges state
+                , psTechFlows = techs
+                , psBioFlows = bios
+                , psWasteFlows = wastes
+                , psUnits = unit : psUnits state
+                , psSupplierLinks = supplierLinks
+                , psContext = Other
+                }
+    InInputGroup _ -> popPath state
+    InOutputGroup _ -> popPath state
+    InReferenceFunction -> popPath state
+    InGeography -> popPath state
+    Other -> popPath state
+
+-- | Close a dataset: snapshot the completed activity and reset per-dataset
+-- accumulators for the next one (multi-dataset files).
+closeDataset :: ParseState -> ParseState
+closeDataset state =
+    let !result = buildResult state
+     in popPath ((resetDataset state){psCompletedActivities = result : psCompletedActivities state})
+
+-- | Clear per-dataset accumulators, preserving cross-dataset state
+-- (psPath and psCompletedActivities).
+resetDataset :: ParseState -> ParseState
+resetDataset state =
+    state
+        { psDatasetNumber = 0
+        , psActivityName = Nothing
+        , psActivityCategory = ""
+        , psActivitySubCategory = ""
+        , psLocation = Nothing
+        , psRefUnit = Nothing
+        , psDescription = []
+        , psExchanges = []
+        , psTechFlows = []
+        , psBioFlows = []
+        , psWasteFlows = []
+        , psUnits = []
+        , psContext = Other
+        , psTextAccum = []
+        , psSupplierLinks = M.empty
+        }
+
+{- | Build exchange, flow, and unit from exchange data.
+@activityLoc@ is the activity's location, used as a biosphere fallback.
+
+EcoSpold1 groups:
+  Input:  1-3 = technosphere, 4 = resource (biosphere)
+  Output: 0 = reference product, 1-3 = byproduct/co-product, 4 = emission (biosphere)
+-}
+buildExchange :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit)
+buildExchange datasetNum activityLoc edata
+    | isWasteFlow = (wasteEx, ParsedWaste wasteFlow, unit)
+    | isBiosphere = (bioEx, ParsedBio bioFlow, unit)
+    | otherwise = (techEx, ParsedTech techFlow, unit)
+  where
+    flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata)
+    unitId = generateUnitUUID (exUnit edata)
+    unit = Unit unitId (exUnit edata) (exUnit edata) ""
+
+    inputGroup = exInputGroup edata
+    outputGroup = exOutputGroup edata
+    isBiosphere = inputGroup == "4" || outputGroup == "4"
+    isInput = not (T.null inputGroup)
+    isReferenceProduct = outputGroup == "0"
+    -- SimaPro's third flow class ('Final waste flows'). EcoSpold1 exports
+    -- surface them on inputGroup=5 to fit the 4-type input/output model, but
+    -- the category attribute survives. Routed to WasteExchange so they bypass
+    -- cross-DB technosphere linking (orphan outputs, not demands).
+    isWasteFlow = exCategory edata == "Final waste flows"
+
+    -- Technosphere: leave empty if unspecified so the Loader can do name-only
+    -- lookup. Biosphere: fall back to the activity location (no supplier link).
+    exchangeLocation
+        | not (T.null (exLocation edata)) = exLocation edata
+        | isBiosphere = fromMaybe "" activityLoc
+        | otherwise = ""
+
+    cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata)
+
+    -- EcoSpold1 never emits ReferenceInput (no waste-treatment encoding here).
+    techRoleFor
+        | isReferenceProduct = ReferenceProduct
+        | isInput = Input
+        | otherwise = Coproduct
+
+    -- activityLinkId stays nil; the Loader resolves it later via
+    -- (flowName, exchangeLocation) against supplier activities.
+    wasteFlow = WasteFlow flowId (exName edata) unitId M.empty cas Nothing
+    wasteEx =
+        WasteExchange
+            { waFlowId = flowId
+            , waAmount = exMeanValue edata
+            , waUnitId = unitId
+            , -- Mirror isInput: final waste flows surface on inputGroup=5
+              -- (consumer's POV: input from a hypothetical treatment service).
+              waIsInput = isInput
+            , waActivityLinkId = UUID.nil
+            , waProcessLinkId = Nothing
+            , waLocation = exchangeLocation
+            , waComment = nonEmptyText (exComment edata)
+            , waPedigree = Nothing
+            }
+
+    subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata)
+    compartment =
+        if T.null (exCategory edata) && isNothing subCat
+            then Nothing
+            else Just (Compartment (exCategory edata) subCat)
+    bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment
+    bioEx =
+        BiosphereExchange
+            { bioFlowId = flowId
+            , bioAmount = exMeanValue edata
+            , bioUnitId = unitId
+            , bioDirection = if inputGroup == "4" then Resource else Emission
+            , bioLocation = exchangeLocation
+            , bioComment = nonEmptyText (exComment edata)
+            , bioPedigree = Nothing
+            }
+
+    techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing
+    techEx =
+        TechnosphereExchange
+            { techFlowId = flowId
+            , techAmount = exMeanValue edata
+            , techUnitId = unitId
+            , techRole = techRoleFor
+            , techActivityLinkId = UUID.nil
+            , techProcessLinkId = Nothing
+            , techLocation = exchangeLocation
+            , techComment = nonEmptyText (exComment edata)
+            , techPedigree = Nothing
+            }
+
+-- | Build the final per-dataset result, applying the cut-off strategy.
+buildResult :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)
+buildResult st =
+    let name = fromMaybe "Unknown Activity" (psActivityName st)
+        location = fromMaybe "GLO" (psLocation st)
+        refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st)
+        description = reverse (psDescription st)
+        classifications =
+            M.fromList $
+                filter
+                    (not . T.null . snd)
+                    [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)]
+        activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing
+        pack act =
+            ( act
+            , reverse (psTechFlows st)
+            , reverse (psBioFlows st)
+            , reverse (psWasteFlows st)
+            , reverse (psUnits st)
+            , psDatasetNumber st
+            , psSupplierLinks st
+            )
+     in pack <$> applyCutoffStrategy activity
+
+-- | Run the shared SAX fold, surfacing any Xeno error as a String.
+foldEcoSpold1 :: BS.ByteString -> Either String ParseState
+foldEcoSpold1 =
+    first show . X.fold onOpenTag onAttribute onEndOpen onText onCloseTag onText initialParseState
+
+-- | Xeno SAX parser for EcoSpold1 — first dataset in the file.
+parseWithXeno :: BS.ByteString -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)
+parseWithXeno xmlContent = do
+    finalState <- foldEcoSpold1 xmlContent
+    case psCompletedActivities finalState of
+        (result : _) -> result
+        [] -> buildResult finalState
+
+{- | Parse ALL datasets from an EcoSpold1 file (multi-dataset support).
+Outer Either = XML parse failure; inner Either = per-activity failure.
+-}
+parseAllWithXeno :: BS.ByteString -> Either String [Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)]
+parseAllWithXeno = fmap (reverse . psCompletedActivities) . foldEcoSpold1
 
 -- | Parse EcoSpold1 file using Xeno SAX parser
 streamParseActivityAndFlowsFromFile1 :: FilePath -> IO (Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int))
@@ -526,274 +557,6 @@ unmarkAsReference ex@WasteExchange{} = ex
 -- Multi-dataset file support
 -- ============================================================================
 
-{- | Parse ALL datasets from an EcoSpold1 file (multi-dataset support)
-Returns the accumulated completed activities from psCompletedActivities
-Outer Either = XML parse failure; inner Either = per-activity failure
--}
-parseAllWithXeno :: BS.ByteString -> Either String [Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)]
-parseAllWithXeno xmlContent =
-    case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of
-        Left err -> Left (show err)
-        Right finalState -> Right (reverse $ psCompletedActivities finalState)
-  where
-    -- Open tag handler
-    openTag state tagName =
-        let newPath = tagName : psPath state
-            newContext
-                | isElement tagName "referenceFunction" = InReferenceFunction
-                | isElement tagName "geography" = InGeography
-                | isElement tagName "exchange" = InExchange emptyExchangeData
-                | isElement tagName "inputGroup" =
-                    case psContext state of
-                        InExchange edata -> InInputGroup edata
-                        _ -> psContext state
-                | isElement tagName "outputGroup" =
-                    case psContext state of
-                        InExchange edata -> InOutputGroup edata
-                        _ -> psContext state
-                | otherwise = psContext state
-         in state{psPath = newPath, psContext = newContext, psTextAccum = []}
-
-    -- Attribute handler
-    attribute state name value =
-        case psContext state of
-            InReferenceFunction ->
-                let st =
-                        state
-                            { psActivityName =
-                                if isElement name "name"
-                                    then Just (bsToText value)
-                                    else psActivityName state
-                            , psRefUnit =
-                                if isElement name "unit"
-                                    then Just (bsToText value)
-                                    else psRefUnit state
-                            , psActivityCategory =
-                                if isElement name "category"
-                                    then bsToText value
-                                    else psActivityCategory state
-                            , psActivitySubCategory =
-                                if isElement name "subCategory"
-                                    then bsToText value
-                                    else psActivitySubCategory state
-                            , psDescription =
-                                if isElement name "generalComment" && not (BS.null value)
-                                    then bsToText value : psDescription state
-                                    else psDescription state
-                            }
-                 in st
-            InGeography ->
-                if isElement name "location"
-                    then state{psLocation = Just (bsToText value)}
-                    else state
-            InExchange edata ->
-                let updated =
-                        edata
-                            { exNumber = if isElement name "number" then bsToInt value else exNumber edata
-                            , exName = if isElement name "name" then bsToText value else exName edata
-                            , exCategory = if isElement name "category" then bsToText value else exCategory edata
-                            , exSubCategory = if isElement name "subCategory" then bsToText value else exSubCategory edata
-                            , exLocation = if isElement name "location" then bsToText value else exLocation edata
-                            , exUnit = if isElement name "unit" then bsToText value else exUnit edata
-                            , exMeanValue = if isElement name "meanValue" then bsToDouble value else exMeanValue edata
-                            , exCASNumber = if isElement name "CASNumber" then bsToText value else exCASNumber edata
-                            , exFormula = if isElement name "formula" then bsToText value else exFormula edata
-                            , exInfrastructure =
-                                if isElement name "infrastructureProcess"
-                                    then bsToText value == "true"
-                                    else exInfrastructure edata
-                            , exComment = if isElement name "generalComment" then bsToText value else exComment edata
-                            }
-                 in state{psContext = InExchange updated}
-            _ ->
-                -- Handle dataset number at top level
-                if isElement name "number" && any (isElement "dataset") (psPath state)
-                    then state{psDatasetNumber = bsToInt value}
-                    else state
-
-    -- End of opening tag
-    endOpen state _tagName = state
-
-    -- Text content handler
-    text state content =
-        let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content
-         in if BS.null trimmed
-                then state
-                else state{psTextAccum = trimmed : psTextAccum state}
-
-    -- Close tag handler
-    closeTag state tagName
-        | isElement tagName "inputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InInputGroup edata ->
-                        state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InExchange edata ->
-                        state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "outputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InOutputGroup edata ->
-                        state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InExchange edata ->
-                        state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "exchange" =
-            case psContext state of
-                InExchange edata ->
-                    let (exchange, parsedFlow, unit) = buildExchangeForAll (psDatasetNumber state) (psLocation state) edata
-                        !supplierLinks = case exchange of
-                            TechnosphereExchange{techRole = Input}
-                                | exNumber edata /= 0 ->
-                                    M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state)
-                            _ -> psSupplierLinks state
-                        (techs, bios, wastes) = case parsedFlow of
-                            ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state)
-                            ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state)
-                            ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state)
-                     in state
-                            { psExchanges = exchange : psExchanges state
-                            , psTechFlows = techs
-                            , psBioFlows = bios
-                            , psWasteFlows = wastes
-                            , psUnits = unit : psUnits state
-                            , psSupplierLinks = supplierLinks
-                            , psContext = Other
-                            , psPath = drop 1 (psPath state)
-                            , psTextAccum = []
-                            }
-                _ -> state{psPath = drop 1 (psPath state)}
-        | isElement tagName "referenceFunction" =
-            state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
-        | isElement tagName "geography" =
-            state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
-        -- Handle dataset close tag: accumulate completed activity
-        | isElement tagName "dataset" =
-            let !result = buildResultForAll state
-                resetState =
-                    state
-                        { psCompletedActivities = result : psCompletedActivities state
-                        , psDatasetNumber = 0
-                        , psActivityName = Nothing
-                        , psActivityCategory = ""
-                        , psActivitySubCategory = ""
-                        , psLocation = Nothing
-                        , psRefUnit = Nothing
-                        , psDescription = []
-                        , psExchanges = []
-                        , psTechFlows = []
-                        , psBioFlows = []
-                        , psWasteFlows = []
-                        , psUnits = []
-                        , psContext = Other
-                        , psTextAccum = []
-                        , psSupplierLinks = M.empty
-                        }
-             in resetState{psPath = drop 1 (psPath state)}
-        | otherwise =
-            state{psPath = drop 1 (psPath state)}
-
-    -- CDATA handler
-    cdata = text
-
-    -- Build exchange, flow, and unit from exchange data (same logic as parseWithXeno)
-    buildExchangeForAll :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit)
-    buildExchangeForAll datasetNum activityLoc edata =
-        let flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata)
-            unitId = generateUnitUUID (exUnit edata)
-            inputGroup = exInputGroup edata
-            outputGroup = exOutputGroup edata
-            isBiosphere = inputGroup == "4" || outputGroup == "4"
-            isInput = not (T.null inputGroup)
-            isReferenceProduct = outputGroup == "0"
-            isWasteFlow = exCategory edata == "Final waste flows"
-            exchangeLocation =
-                if T.null (exLocation edata)
-                    then
-                        if isBiosphere
-                            then fromMaybe "" activityLoc
-                            else ""
-                    else exLocation edata
-            cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata)
-            unit = Unit unitId (exUnit edata) (exUnit edata) ""
-            techRoleFor
-                | isReferenceProduct = ReferenceProduct
-                | isInput = Input
-                | otherwise = Coproduct
-         in if isWasteFlow
-                then
-                    let wf = WasteFlow flowId (exName edata) unitId M.empty cas Nothing
-                        ex =
-                            WasteExchange
-                                { waFlowId = flowId
-                                , waAmount = exMeanValue edata
-                                , waUnitId = unitId
-                                , waIsInput = isInput
-                                , waActivityLinkId = UUID.nil
-                                , waProcessLinkId = Nothing
-                                , waLocation = exchangeLocation
-                                , waComment = nonEmptyText (exComment edata)
-                                , waPedigree = Nothing
-                                }
-                     in (ex, ParsedWaste wf, unit)
-                else
-                    if isBiosphere
-                        then
-                            let subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata)
-                                compartment =
-                                    if T.null (exCategory edata) && isNothing subCat
-                                        then Nothing
-                                        else Just (Compartment (exCategory edata) subCat)
-                                bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment
-                                ex =
-                                    BiosphereExchange
-                                        { bioFlowId = flowId
-                                        , bioAmount = exMeanValue edata
-                                        , bioUnitId = unitId
-                                        , bioDirection = if inputGroup == "4" then Resource else Emission
-                                        , bioLocation = exchangeLocation
-                                        , bioComment = nonEmptyText (exComment edata)
-                                        , bioPedigree = Nothing
-                                        }
-                             in (ex, ParsedBio bioFlow, unit)
-                        else
-                            let techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing
-                                ex =
-                                    TechnosphereExchange
-                                        { techFlowId = flowId
-                                        , techAmount = exMeanValue edata
-                                        , techUnitId = unitId
-                                        , techRole = techRoleFor
-                                        , techActivityLinkId = UUID.nil
-                                        , techProcessLinkId = Nothing
-                                        , techLocation = exchangeLocation
-                                        , techComment = nonEmptyText (exComment edata)
-                                        , techPedigree = Nothing
-                                        }
-                             in (ex, ParsedTech techFlow, unit)
-
-    -- Build final result for a single dataset
-    buildResultForAll :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)
-    buildResultForAll st =
-        let name = fromMaybe "Unknown Activity" (psActivityName st)
-            location = fromMaybe "GLO" (psLocation st)
-            refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st)
-            description = reverse (psDescription st)
-            classifications =
-                M.fromList $
-                    filter
-                        (not . T.null . snd)
-                        [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)]
-            activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing
-            techs = reverse (psTechFlows st)
-            bios = reverse (psBioFlows st)
-            wastes = reverse (psWasteFlows st)
-            units = reverse (psUnits st)
-         in case applyCutoffStrategy activity of
-                Right act -> Right (act, techs, bios, wastes, units, psDatasetNumber st, psSupplierLinks st)
-                Left err -> Left err
-
 {- | Parse ALL datasets from a single EcoSpold1 file
 Used for multi-dataset files where <ecoSpold> contains multiple <dataset> elements
 Skips activities that fail (e.g. no reference product) and logs warnings

From d721d19b958b221d887817b602639bef22ef654a Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 17:51:07 +0200
Subject: [PATCH 32/43] simapro: refactor processBlockToActivity and its param
 plumbing (#95)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Refactors `SimaPro.Parser.processBlockToActivity` (the SimaPro
`ProcessBlock` → `Activity` conversion) and the parameter plumbing
feeding it. Pure readability/structure work — **no change to parsing
output**.

- **Parameter environment**: the six-step `env0 → env4 → env` staircase
becomes a single pure `buildParamEnv` that folds over ordered parameter
groups — one pass for input params, fixed-point iteration for calculated
params (forward references). It's now a reusable, independently testable
function.
- **Row decomposition**: the ~40-line block of hand-written
triple-projection lambdas (`\(e,_,_) -> e`, …) becomes `unzip3` +
`catMaybes`. The artificial `techTriples`/`treatmentTriples` split is
gone (they were always concatenated). The per-product body moved into a
`where`-clause and the two loop-invariant activity fields
(`description`, `nativeType`) are hoisted out of the per-coproduct
lambda.
- **Param plumbing**: database/project params now travel as a named
`GlobalParams` record (with a `Monoid` instance) plus a `WorkerResult`
record for `parseWorkerLines`, replacing four identically-typed
positional `[(Text,Text)]` tuples. The parallel-worker merge collapses
to `foldMap`, and the four same-typed lists can no longer be swapped by
mistake.

Net effect: `processBlockToActivity` drops from ~130 lines / 4 nesting
levels to ~90 lines / mostly 2 levels, and the parameter logic becomes
reusable units.

## Test plan

- [x] `cabal build` (library + executable) — clean
- [x] `cabal test lca-tests --test-options='--match "SimaPro"'` —
111/111 pass
- [x] `cabal test lca-tests` — 1107 examples pass (the only failures
were `Server`/`Routes` integration tests that spawn the `volca` binary;
they pass once the executable is built)
- [x] Behaviour-preservation verified against existing coverage:
coproducts & shared activityUUID, allocation % + formula, per-coproduct
exchange scaling, db/project/process param envs, final-waste routing,
all biosphere sections, native process type
---
 src/SimaPro/Parser.hs | 298 ++++++++++++++++++++++--------------------
 1 file changed, 157 insertions(+), 141 deletions(-)

diff --git a/src/SimaPro/Parser.hs b/src/SimaPro/Parser.hs
index 2dc42bb4..03a7dbfd 100644
--- a/src/SimaPro/Parser.hs
+++ b/src/SimaPro/Parser.hs
@@ -34,6 +34,7 @@ module SimaPro.Parser (
 import Control.Concurrent.Async (mapConcurrently)
 import Control.DeepSeq (NFData, force)
 import Control.Exception (evaluate)
+import Control.Monad (mfilter)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as BS8
 import qualified Data.ByteString.Lazy as BL
@@ -41,7 +42,7 @@ import Data.Char (isUpper, toLower)
 import qualified Data.Csv as Csv
 import Data.List (dropWhileEnd)
 import qualified Data.Map.Strict as M
-import Data.Maybe (isNothing)
+import Data.Maybe (catMaybes, isNothing, maybeToList)
 import qualified Data.Set as S
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -241,6 +242,41 @@ data ParseAcc = ParseAcc
     , paProjCalcParams :: ![(Text, Text)]
     }
 
+-- ============================================================================
+-- Global parameter bundle
+-- ============================================================================
+
+{- | Parameters declared outside any single process block — database- and
+project-level Input/Calculated params — threaded into every block's evaluation
+environment. The 'Monoid' instance merges the params each parallel worker
+collected from its own chunk.
+-}
+data GlobalParams = GlobalParams
+    { gpDbInput :: ![(Text, Text)]
+    , gpDbCalc :: ![(Text, Text)]
+    , gpProjInput :: ![(Text, Text)]
+    , gpProjCalc :: ![(Text, Text)]
+    }
+    deriving (Show, Eq, Generic)
+
+instance NFData GlobalParams
+
+instance Semigroup GlobalParams where
+    GlobalParams a1 b1 c1 d1 <> GlobalParams a2 b2 c2 d2 =
+        GlobalParams (a1 <> a2) (b1 <> b2) (c1 <> c2) (d1 <> d2)
+
+instance Monoid GlobalParams where
+    mempty = GlobalParams [] [] [] []
+
+-- | Output of parsing one contiguous chunk of lines.
+data WorkerResult = WorkerResult
+    { wrBlocks :: ![ProcessBlock]
+    , wrParams :: !GlobalParams
+    }
+    deriving (Show, Eq, Generic)
+
+instance NFData WorkerResult
+
 -- ============================================================================
 -- Header Parsing
 -- ============================================================================
@@ -760,140 +796,121 @@ resolveAmount env raw fallback
             Right v -> v
             Left _ -> fallback
 
+{- | Build the resolved parameter environment and the raw-expression map from
+ordered parameter groups. Input groups are resolved with a single pass each;
+calc groups iterate to a fixed point (to resolve forward references where a
+param depends on one defined later in the CSV). Groups are ordered low→high
+precedence: database, project, process.
+-}
+buildParamEnv :: [[(Text, Text)]] -> [[(Text, Text)]] -> (M.Map Text Double, M.Map Text Text)
+buildParamEnv inputGroups calcGroups =
+    ( foldl' evalToFixpoint (foldl' (foldl' evalParam) M.empty inputGroups) calcGroups
+    , M.fromList (concat (inputGroups ++ calcGroups))
+    )
+  where
+    evalParam acc (name, rawVal) =
+        either (const acc) (\v -> M.insert name v acc) (Expr.evaluate acc rawVal)
+    evalToFixpoint acc params =
+        let acc' = foldl' evalParam acc params
+         in if M.size acc' == M.size acc then acc' else evalToFixpoint acc' params
+
 {- | Convert ProcessBlock to list of Activities (one per product)
 This matches EcoSpold behavior where multi-product processes create multiple activities
 Global params (db + project level) are passed in and combined with process-level params.
 -}
 processBlockToActivity ::
     UnitConversion.UnitConfig ->
-    ([(Text, Text)], [(Text, Text)], [(Text, Text)], [(Text, Text)]) ->
+    GlobalParams ->
     ProcessBlock ->
     [(Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit])]
-processBlockToActivity unitCfg (dbInputPs, dbCalcPs, projInputPs, projCalcPs) ProcessBlock{..} =
-    let
-        -- Build parameter environment: input params first, then calculated params
-        evalParam acc (name, rawVal) = case Expr.evaluate acc rawVal of
-            Right v -> M.insert name v acc
-            Left _ -> acc
-        -- Fixed-point iteration for calculated params: repeat until no new variables resolve
-        -- (handles forward references where a param depends on one defined later in the CSV)
-        evalCalcParams acc params =
-            let acc' = foldl' evalParam acc params
-             in if M.size acc' == M.size acc then acc' else evalCalcParams acc' params
-        env0 = foldl' evalParam M.empty (reverse dbInputPs)
-        env1 = foldl' evalParam env0 (reverse projInputPs)
-        env2 = foldl' evalParam env1 (reverse pbInputParams)
-        env3 = evalCalcParams env2 (reverse dbCalcPs)
-        env4 = evalCalcParams env3 (reverse projCalcPs)
-        env = evalCalcParams env4 (reverse pbCalcParams)
-
-        -- Raw expression map for re-evaluation
-        allExprs =
-            reverse dbInputPs
-                ++ reverse projInputPs
-                ++ reverse pbInputParams
-                ++ reverse dbCalcPs
-                ++ reverse projCalcPs
-                ++ reverse pbCalcParams
-        exprMap = M.fromList allExprs
-
-        -- Extract location from process name if not specified
-        (cleanProcessNameRaw, locFromName) = extractLocation pbName
-        location = if T.null pbLocation || T.toLower pbLocation == "unspecified" then locFromName else pbLocation
-        -- Trimmed Process name (without curly-brace location tag). Empty when
-        -- the SimaPro "Process name" field is empty (typical for mono-product
-        -- blocks where only the Product line carries the human-readable name).
-        processNameTrimmed = T.strip cleanProcessNameRaw
-
-        -- Convert all rows in one pass, collecting exchanges/flows/units together
-        avoidedTriples = map (productToExchange unitCfg env False) pbAvoidedProducts
-        techTriples = map (techRowToExchange env) (pbMaterials ++ pbElectricity)
-        treatmentTriples = map (techRowToExchange env) pbWasteToTreatment
-        -- SimaPro's 'Final waste flows' section: outputs the activity 'throws
-        -- away' with no modelled treatment in the dataset. Route to
-        -- WasteExchange so the cross-DB linker doesn't tally them as missing
-        -- suppliers (they're end-of-life markers, not demands).
-        finalWasteTriples = map (wasteRowToExchange env) pbFinalWaste
-        bioTriples =
-            map (bioRowToExchange env True "resource") pbResources
-                ++ map (bioRowToExchange env False "air") pbEmissionsAir
-                ++ map (bioRowToExchange env False "water") pbEmissionsWater
-                ++ map (bioRowToExchange env False "soil") pbEmissionsSoil
-
-        -- Tech rows may have zero amounts (Maybe Exchange), others always have exchanges
-        techRowExchanges = [e | (Just e, _, _) <- techTriples ++ treatmentTriples]
-        techRowFlows = [f | (_, f, _) <- techTriples ++ treatmentTriples]
-        techRowUnits = [u | (_, _, u) <- techTriples ++ treatmentTriples]
-
-        sharedExchanges =
-            map (\(e, _, _) -> e) avoidedTriples
-                ++ techRowExchanges
-                ++ map (\(e, _, _) -> e) bioTriples
-                ++ map (\(e, _, _) -> e) finalWasteTriples
-        sharedTechFlows = map (\(_, f, _) -> f) avoidedTriples ++ techRowFlows
-        sharedBioFlows = map (\(_, f, _) -> f) bioTriples
-        sharedWasteFlows = map (\(_, f, _) -> f) finalWasteTriples
-        sharedUnits =
-            S.toList . S.fromList $
-                map (\(_, _, u) -> unitName u) avoidedTriples
-                    ++ map unitName techRowUnits
-                    ++ map (\(_, _, u) -> unitName u) bioTriples
-                    ++ map (\(_, _, u) -> unitName u) finalWasteTriples
-
-        -- Create one activity per product
-        makeActivity :: ProductRow -> (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit])
-        makeActivity prod =
-            let (productExchange, productFlow, productUnit) = productToExchange unitCfg env True prod
-                effUnitName = unitName productUnit
-                allocPercent = resolveAmount env (prAllocRaw prod) (prAllocation prod)
-                allocFraction = allocPercent / 100.0
-                (cleanProductName, locFromProduct) = extractLocation (prName prod)
-                effectiveLoc = if T.null location then locFromProduct else location
-                -- Activity name = Process name when present (so coproducts of
-                -- the same Process share one activityUUID via generateActivityUUID),
-                -- otherwise fall back to product name (mono-product blocks
-                -- with empty "Process name" field).
-                effectiveActivityName =
-                    if T.null processNameTrimmed then cleanProductName else processNameTrimmed
-                allocFormulaRaw = T.strip (prAllocRaw prod)
-                allocFormula =
-                    if T.null allocFormulaRaw || isNumericFormula allocFormulaRaw
-                        then Nothing
-                        else Just allocFormulaRaw
-                activity =
-                    Activity
-                        { activityName = effectiveActivityName
-                        , activityDescription = if T.null pbComment then [] else [pbComment]
-                        , activitySynonyms = M.empty
-                        , activityClassification =
-                            M.fromList $
-                                filter
-                                    (not . T.null . snd)
-                                    [ ("Category type", pbCategoryType)
-                                    , ("Category", prCategory prod)
-                                    ]
-                        , activityLocation = effectiveLoc
-                        , activityUnit = effUnitName
-                        , exchanges = productExchange : map (scaleExchange allocFraction) sharedExchanges
-                        , activityParams = env
-                        , activityParamExprs = exprMap
-                        , activityAllocationPercent = Just allocPercent
-                        , activityAllocationFormula = allocFormula
-                        , activityNativeType =
-                            if T.null pbType
-                                then Nothing
-                                else Just (SimaProProcessType{sptLabel = pbType})
-                        }
-                allTechFlows = productFlow : sharedTechFlows
-                allBioFlows = sharedBioFlows
-                allWasteFlows = sharedWasteFlows
-                allUnits =
-                    map
-                        (\name -> Unit (generateUnitUUID name) name name "")
-                        (S.toList . S.fromList $ effUnitName : sharedUnits)
-             in (activity, allTechFlows, allBioFlows, allWasteFlows, allUnits)
-     in
-        map makeActivity pbProducts
+processBlockToActivity unitCfg GlobalParams{..} ProcessBlock{..} =
+    map makeActivity pbProducts
+  where
+    (env, exprMap) =
+        buildParamEnv
+            (reverse <$> [gpDbInput, gpProjInput, pbInputParams])
+            (reverse <$> [gpDbCalc, gpProjCalc, pbCalcParams])
+
+    -- Extract location from process name if not specified.
+    (cleanProcessNameRaw, locFromName) = extractLocation pbName
+    location =
+        if T.null pbLocation || T.toLower pbLocation == "unspecified"
+            then locFromName
+            else pbLocation
+    -- Trimmed Process name (without curly-brace location tag). Empty when the
+    -- SimaPro "Process name" field is empty (typical for mono-product blocks
+    -- where only the Product line carries the human-readable name).
+    processNameTrimmed = T.strip cleanProcessNameRaw
+
+    -- Convert each section's rows to (exchange, flow, unit) triples in one pass.
+    -- 'Final waste flows' route to WasteExchange so the cross-DB linker doesn't
+    -- tally them as missing suppliers (they're end-of-life markers, not demands).
+    (avoidedExs, avoidedFlows, avoidedUnits) =
+        unzip3 (productToExchange unitCfg env False <$> pbAvoidedProducts)
+    (techMaybeExs, techFlows, techUnits) =
+        unzip3 (techRowToExchange env <$> (pbMaterials ++ pbElectricity ++ pbWasteToTreatment))
+    (bioExs, bioFlows, bioUnits) =
+        unzip3 $
+            (bioRowToExchange env True "resource" <$> pbResources)
+                ++ (bioRowToExchange env False "air" <$> pbEmissionsAir)
+                ++ (bioRowToExchange env False "water" <$> pbEmissionsWater)
+                ++ (bioRowToExchange env False "soil" <$> pbEmissionsSoil)
+    (wasteExs, wasteFlows, wasteUnits) =
+        unzip3 (wasteRowToExchange env <$> pbFinalWaste)
+
+    -- Exchanges/flows/units shared by every coproduct (scaled per product below).
+    -- Tech rows with a zero amount yield no exchange but still contribute a flow.
+    sharedExchanges = avoidedExs ++ catMaybes techMaybeExs ++ bioExs ++ wasteExs
+    sharedTechFlows = avoidedFlows ++ techFlows
+    sharedBioFlows = bioFlows
+    sharedWasteFlows = wasteFlows
+    sharedUnitNames =
+        S.toList . S.fromList $ unitName <$> (avoidedUnits ++ techUnits ++ bioUnits ++ wasteUnits)
+
+    -- Loop-invariant activity fields (independent of the coproduct).
+    descriptionLines = maybeToList (nonEmptyText pbComment)
+    nativeType = SimaProProcessType <$> nonEmptyText pbType
+
+    makeActivity :: ProductRow -> (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit])
+    makeActivity prod =
+        let (productExchange, productFlow, productUnit) = productToExchange unitCfg env True prod
+            effUnitName = unitName productUnit
+            allocPercent = resolveAmount env (prAllocRaw prod) (prAllocation prod)
+            allocFraction = allocPercent / 100.0
+            (cleanProductName, locFromProduct) = extractLocation (prName prod)
+            effectiveLoc = if T.null location then locFromProduct else location
+            -- Activity name = Process name when present (so coproducts of the same
+            -- Process share one activityUUID via generateActivityUUID), otherwise
+            -- fall back to product name (mono-product blocks with empty field).
+            effectiveActivityName =
+                if T.null processNameTrimmed then cleanProductName else processNameTrimmed
+            allocFormula = mfilter (not . isNumericFormula) (nonEmptyText (prAllocRaw prod))
+            activity =
+                Activity
+                    { activityName = effectiveActivityName
+                    , activityDescription = descriptionLines
+                    , activitySynonyms = M.empty
+                    , activityClassification =
+                        M.fromList $
+                            filter
+                                (not . T.null . snd)
+                                [ ("Category type", pbCategoryType)
+                                , ("Category", prCategory prod)
+                                ]
+                    , activityLocation = effectiveLoc
+                    , activityUnit = effUnitName
+                    , exchanges = productExchange : map (scaleExchange allocFraction) sharedExchanges
+                    , activityParams = env
+                    , activityParamExprs = exprMap
+                    , activityAllocationPercent = Just allocPercent
+                    , activityAllocationFormula = allocFormula
+                    , activityNativeType = nativeType
+                    }
+            allUnits =
+                map
+                    (\name -> Unit (generateUnitUUID name) name name "")
+                    (S.toList . S.fromList $ effUnitName : sharedUnitNames)
+         in (activity, productFlow : sharedTechFlows, sharedBioFlows, sharedWasteFlows, allUnits)
 
 -- | True when the raw allocation cell is a plain decimal literal (no formula).
 isNumericFormula :: Text -> Bool
@@ -1250,7 +1267,7 @@ splitForWorkers numWorkers allLines
             chopAtEnds target endCount (l : acc) ls
 
 -- | Parse a contiguous range of lines into ProcessBlocks + global params.
-parseWorkerLines :: SimaProConfig -> [BS.ByteString] -> ([ProcessBlock], [(Text, Text)], [(Text, Text)], [(Text, Text)], [(Text, Text)])
+parseWorkerLines :: SimaProConfig -> [BS.ByteString] -> WorkerResult
 parseWorkerLines cfg ls =
     let initAcc =
             ParseAcc
@@ -1265,12 +1282,16 @@ parseWorkerLines cfg ls =
                 , paProjCalcParams = []
                 }
         finalAcc = foldl' processLine initAcc ls
-     in ( reverse (paBlocks finalAcc)
-        , paDbInputParams finalAcc
-        , paDbCalcParams finalAcc
-        , paProjInputParams finalAcc
-        , paProjCalcParams finalAcc
-        )
+     in WorkerResult
+            { wrBlocks = reverse (paBlocks finalAcc)
+            , wrParams =
+                GlobalParams
+                    { gpDbInput = paDbInputParams finalAcc
+                    , gpDbCalc = paDbCalcParams finalAcc
+                    , gpProjInput = paProjInputParams finalAcc
+                    , gpProjCalc = paProjCalcParams finalAcc
+                    }
+            }
 
 -- ============================================================================
 -- Main Entry Point
@@ -1304,13 +1325,8 @@ parseSimaProCSV unitCfg path = do
 
     -- Parse chunks in parallel — each worker folds its contiguous range
     results <- mapConcurrently (evaluate . force . parseWorkerLines cfg) workerChunks
-    let allBlocks = concatMap (\(b, _, _, _, _) -> b) results
-        globalParams =
-            ( concatMap (\(_, a, _, _, _) -> a) results
-            , concatMap (\(_, _, b, _, _) -> b) results
-            , concatMap (\(_, _, _, c, _) -> c) results
-            , concatMap (\(_, _, _, _, d) -> d) results
-            )
+    let allBlocks = concatMap wrBlocks results
+        globalParams = foldMap wrParams results
 
     -- Convert all blocks to activities (one activity per product) - PARALLEL
     converted <- concat <$> mapConcurrently (evaluate . force . processBlockToActivity unitCfg globalParams) allBlocks

From 829b74c6acba739e8e7f400e8286b905a260d2b7 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 17:51:43 +0200
Subject: [PATCH 33/43] refactor(method): model SimaPro method parser state as
 one Stage ADT (#96)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Continues the PR #87 line of cleanup, this time on
`Method.ParserSimaPro` — the
12-phase fold that parses SimaPro LCIA method CSV exports.

The phase lived in a `Phase` enum next to a flat 13-field `ParseState`
whose
per-section accumulators were always present, so the type permitted a
phase to
disagree with the data it held. This change folds phase + accumulator
into a
single `Stage` sum type whose constructors carry exactly what each phase
needs,
making those impossible states unrepresentable. It also factors out the
duplicated leaf logic and removes a partial function.

## What changed

- **`Stage` ADT** replaces `Phase` + the flat accumulator fields.
`CatAccum` /
`DamageAccum` / `NWAccum` hold the in-progress block; completed lists
and the
constant methodology name move to a slim `ParseState` (so `step` drops
its
  threaded methodology argument).
- **Shared helpers** pulled out of the `step` `case`: `parseCFRow` (the
inlined
  ~35-line CF builder), one `parseNameValue` for the three identical
`name;value` rows, `parseNameUnit` for both `Name;Unit` headers,
`buildMethod`,
and `detectMarker`/`stageFor` mirroring the sibling
`SimaPro.Parser.detectSection`.
- **`finalize` collapsed** from re-implemented builders to a 6-line
read-out that
reuses the same `finishCat`/`finishDamage`/`finishNW` finishers as
`step`, via
  one `finishCurrent` dispatcher.
- **Partial function removed**: `parseNameUnit` is total, deleting
`head'` and its
  `error` call.

## Behaviour

Output is unchanged on well-formed SimaPro files (verified by tracing
the
fixture through every transition). Three harmonisations are unreachable
on valid
exports: empty blocks are no longer emitted, and a truncated file
lacking a
terminal `End` now keeps its trailing block and its methodology instead
of
dropping them.

Net: `216 insertions, 233 deletions` in one file.

## Test plan

- [x] `cabal build lib:volca` — clean, no incomplete-pattern warnings on
the new `Stage`/`step`
- [x] `cabal run lca-tests -- --match "/Method"` — 115 examples, 0
failures (14 SimaPro method cases + standalone NW parser among them)
---
 src/Method/ParserSimaPro.hs | 459 ++++++++++++++++++------------------
 1 file changed, 226 insertions(+), 233 deletions(-)

diff --git a/src/Method/ParserSimaPro.hs b/src/Method/ParserSimaPro.hs
index a24b45e7..0cae0dc7 100644
--- a/src/Method/ParserSimaPro.hs
+++ b/src/Method/ParserSimaPro.hs
@@ -50,8 +50,8 @@ parseSimaProMethodCSVBytes raw =
     let !utf8 = ensureUtf8 raw
         lns = BS8.lines utf8
         cfg = parseConfig lns
-        methodName' = parseMethodName cfg lns
-        result = foldl' (step cfg methodName') initState lns
+        methodologyName = parseMethodName cfg lns
+        result = foldl' (step cfg) (initState methodologyName) lns
      in Right (finalize result)
 
 {- | Detect whether bytes are a SimaPro method CSV export.
@@ -74,263 +74,260 @@ isSimaProMethodCSV bs =
 -- Parser State
 -- ============================================================================
 
+-- | In-progress accumulators. Each carries exactly the data its stage needs,
+-- so the parser can never hold (say) lingering CFs while reading an NW set.
+data CatAccum = CatAccum !Text !Text ![MethodCF]
+data DamageAccum = DamageAccum !Text !Text ![(Text, Double)]
+data NWAccum = NWAccum !Text !(M.Map Text Double) !(M.Map Text Double)
+
+{- | The single source of truth for "where we are and what we're collecting".
+Folds the old @Phase@ enum together with the per-section accumulators: an
+impossible state (a phase disagreeing with its accumulator) is unrepresentable.
+-}
+data Stage
+    = Header -- reading {key: value} header lines
+    | MethodMeta -- skipping method-level metadata until a section marker
+    | BetweenSections -- finished a block; expecting the next marker or End
+    | NeedCatLine -- expecting "Name;Unit" after "Impact category"
+    | NeedSubstances !Text !Text -- have cat name+unit; expecting "Substances"
+    | ReadingCFs !CatAccum -- reading substance/CF rows
+    | NeedDcLine -- expecting "Name;Unit" after "Damage category"
+    | NeedDcImpacts !Text !Text -- expecting "Impact categories" marker
+    | ReadingDcImpacts !DamageAccum -- reading impact rows of a damage category
+    | NeedNWName -- expecting the NW-set name line
+    | NeedNWSection !NWAccum -- expecting "Normalization"/"Weighting"/next marker
+    | ReadingNorm !NWAccum -- reading normalization rows
+    | ReadingWeight !NWAccum -- reading weighting rows
+
 data ParseState = ParseState
-    { psPhase :: !Phase
-    , psCatName :: !Text -- current impact category name
-    , psCatUnit :: !Text -- current impact category unit
-    , psFactors :: ![MethodCF] -- CFs accumulated (reversed) for current category
+    { psStage :: !Stage
+    , psMethodology :: !Text -- constant; the method-level "Name"
     , psMethods :: ![Method] -- completed methods (reversed)
-    -- Damage categories
     , psDamageCats :: ![DamageCategory] -- completed damage categories (reversed)
-    , psDcName :: !Text -- current damage category name
-    , psDcUnit :: !Text -- current damage category unit
-    , psDcImpacts :: ![(Text, Double)] -- current damage category impacts (reversed)
-    -- Normalization/Weighting
     , psNWsets :: ![NormWeightSet] -- completed NW sets (reversed)
-    , psNWname :: !Text -- current NW set name
-    , psNormMap :: !(M.Map Text Double) -- current normalization factors
-    , psWeightMap :: !(M.Map Text Double) -- current weighting factors
     }
 
-data Phase
-    = PhHeader -- reading {key: value} header lines
-    | PhMethodMeta -- reading method-level metadata (Name, Version, etc.)
-    | PhExpectCategory -- expecting "Impact category" or "Damage category" etc.
-    | PhExpectCatLine -- expecting the "Name;Unit" line after "Impact category"
-    | PhExpectSubst -- expecting blank or "Substances" marker
-    | PhReadingCFs -- reading substance/CF rows
-    -- Damage categories
-    | PhExpectDcLine -- expecting "Name;Unit" after "Damage category"
-    | PhExpectDcImpacts -- expecting "Impact categories" marker
-    | PhReadingDcImpacts -- reading impact category rows in damage category
-    -- Normalization/Weighting
-    | PhExpectNWname -- expecting NW set name line
-    | PhExpectNWsection -- expecting "Normalization" or "Weighting" or next section
-    | PhReadingNorm -- reading normalization rows
-    | PhReadingWeight -- reading weighting rows
-
-initState :: ParseState
-initState = ParseState PhHeader "" "" [] [] [] "" "" [] [] "" M.empty M.empty
+initState :: Text -> ParseState
+initState methodology = ParseState Header methodology [] [] []
 
 -- ============================================================================
 -- State Machine
 -- ============================================================================
 
-step :: SimaProConfig -> Text -> ParseState -> BS.ByteString -> ParseState
-step cfg methodologyName st line = case psPhase st of
-    PhHeader
+step :: SimaProConfig -> ParseState -> BS.ByteString -> ParseState
+step cfg st line = case psStage st of
+    Header
         | BS8.isPrefixOf "{" line -> st
-        | otherwise -> st{psPhase = PhMethodMeta}
-    PhMethodMeta
-        | stripped == "Impact category" -> st{psPhase = PhExpectCatLine}
-        | stripped == "Damage category" -> st{psPhase = PhExpectDcLine}
-        | isNWsetMarker stripped -> st{psPhase = PhExpectNWname}
+        | otherwise -> st{psStage = MethodMeta}
+    MethodMeta
+        | Just m <- detectMarker stripped -> st{psStage = stageFor m}
         | otherwise -> st
-    PhExpectCategory
-        | stripped == "Impact category" -> st{psPhase = PhExpectCatLine}
-        | stripped == "Damage category" -> st{psPhase = PhExpectDcLine}
-        | isNWsetMarker stripped -> st{psPhase = PhExpectNWname}
-        | stripped == "End" -> st
+    BetweenSections
+        | Just m <- detectMarker stripped -> st{psStage = stageFor m}
         | otherwise -> st
-    PhExpectCatLine
+    NeedCatLine
         | isBlank line -> st
         | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-                catName = decodeBS (BS8.strip (head' fields))
-                catUnit =
-                    if length fields > 1
-                        then decodeBS (BS8.strip (fields !! 1))
-                        else ""
-             in st
-                    { psPhase = PhExpectSubst
-                    , psCatName = catName
-                    , psCatUnit = catUnit
-                    , psFactors = []
-                    }
-    PhExpectSubst
+            let (name, unit) = parseNameUnit cfg line
+             in st{psStage = NeedSubstances name unit}
+    NeedSubstances name unit
         | isBlank line -> st
-        | stripped == "Substances" -> st{psPhase = PhReadingCFs}
+        | stripped == "Substances" -> st{psStage = ReadingCFs (CatAccum name unit [])}
         | otherwise -> st
-    PhReadingCFs
-        | isBlank line -> finishCategory st
-        | stripped == "Impact category" ->
-            (finishCategory st){psPhase = PhExpectCatLine}
-        | stripped == "Damage category" ->
-            (finishCategory st){psPhase = PhExpectDcLine}
-        | isNWsetMarker stripped ->
-            (finishCategory st){psPhase = PhExpectNWname}
-        | stripped == "End" -> finishCategory st
-        | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-             in case fields of
-                    (comp : sub : name : cas : cfVal : cfUnit : _) ->
-                        let !rawName = decodeBS (BS8.strip name)
-                            -- Keep the full suffixed name so the CF's
-                            -- 'mcfFlowRef' UUID matches the suffixed
-                            -- biosphere flow UUID parsed by
-                            -- 'SimaPro.Parser.bioRowToExchange'. The location
-                            -- is also exposed via 'mcfConsumerLocation' for
-                            -- regional dispatch on engines that key CFs by
-                            -- activity location (openLCA JSON-LD); SimaPro
-                            -- CSV CFs are already region-tagged in the name,
-                            -- so dual storage is correct.
-                            !mLoc = snd (extractLocationSuffix rawName)
-                            !cfUnitT = decodeBS (BS8.strip cfUnit)
-                            -- UUID hashed via the shared 'generateFlowUUID' +
-                            -- 'normalizeSimaProCompartment' so the CF side
-                            -- and 'SimaPro.Parser.bioRowToExchange' produce
-                            -- the same UUID for the same flow.
-                            !flowRef =
-                                generateFlowUUID
-                                    rawName
-                                    (normalizeSimaProCompartment (decodeBS comp) (decodeBS sub))
-                                    cfUnitT
-                            !cf =
-                                MethodCF
-                                    { mcfFlowRef = flowRef
-                                    , mcfFlowName = rawName
-                                    , mcfDirection = direction comp
-                                    , mcfValue = parseAmount (spDecimal cfg) (BS8.strip cfVal)
-                                    , mcfCompartment = mkCompartment comp sub
-                                    , mcfCAS = normalizeCAS (decodeBS (BS8.strip cas))
-                                    , mcfUnit = cfUnitT
-                                    , mcfConsumerLocation = mLoc
-                                    }
-                         in st{psFactors = cf : psFactors st}
-                    _ -> st
-    -- Damage category parsing
-    PhExpectDcLine
+    ReadingCFs acc
+        | isBlank line -> finishCat acc st{psStage = BetweenSections}
+        | stripped == "End" -> finishCat acc st{psStage = BetweenSections}
+        | Just m <- detectMarker stripped -> finishCat acc st{psStage = stageFor m}
+        | Just cf <- parseCFRow cfg line -> st{psStage = ReadingCFs (consCF cf acc)}
+        | otherwise -> st
+    NeedDcLine
         | isBlank line -> st
         | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-                dcn = decodeBS (BS8.strip (head' fields))
-                dcu = if length fields > 1 then decodeBS (BS8.strip (fields !! 1)) else ""
-             in st{psPhase = PhExpectDcImpacts, psDcName = dcn, psDcUnit = dcu, psDcImpacts = []}
-    PhExpectDcImpacts
+            let (name, unit) = parseNameUnit cfg line
+             in st{psStage = NeedDcImpacts name unit}
+    NeedDcImpacts name unit
         | isBlank line -> st
-        | stripped == "Impact categories" -> st{psPhase = PhReadingDcImpacts}
+        | stripped == "Impact categories" -> st{psStage = ReadingDcImpacts (DamageAccum name unit [])}
         | otherwise -> st
-    PhReadingDcImpacts
-        | isBlank line -> finishDamageCategory st
-        | stripped == "Damage category" ->
-            (finishDamageCategory st){psPhase = PhExpectDcLine}
-        | isNWsetMarker stripped ->
-            (finishDamageCategory st){psPhase = PhExpectNWname}
-        | stripped == "End" -> finishDamageCategory st
-        | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-             in case fields of
-                    (name : val : _) ->
-                        let n = decodeBS (BS8.strip name)
-                            v = parseAmount (spDecimal cfg) (BS8.strip val)
-                         in st{psDcImpacts = (n, v) : psDcImpacts st}
-                    _ -> st
-    -- Normalization/Weighting parsing
-    PhExpectNWname
+    ReadingDcImpacts acc
+        | isBlank line -> finishDamage acc st{psStage = BetweenSections}
+        | stripped == "End" -> finishDamage acc st{psStage = BetweenSections}
+        | Just m <- detectMarker stripped -> finishDamage acc st{psStage = stageFor m}
+        | Just nv <- parseNameValue cfg line -> st{psStage = ReadingDcImpacts (consImpact nv acc)}
+        | otherwise -> st
+    NeedNWName
         | isBlank line -> st
-        | otherwise -> st{psPhase = PhExpectNWsection, psNWname = decodeBS (BS8.strip line)}
-    PhExpectNWsection
+        | otherwise -> st{psStage = NeedNWSection (NWAccum (decodeBS stripped) M.empty M.empty)}
+    NeedNWSection acc
         | isBlank line -> st
-        | stripped == "Normalization" -> st{psPhase = PhReadingNorm}
-        | stripped == "Weighting" -> st{psPhase = PhReadingWeight}
-        | stripped == "Damage category" ->
-            (finishNWset st){psPhase = PhExpectDcLine}
-        | isNWsetMarker stripped ->
-            (finishNWset st){psPhase = PhExpectNWname}
-        | stripped == "End" -> finishNWset st
+        | stripped == "Normalization" -> st{psStage = ReadingNorm acc}
+        | stripped == "Weighting" -> st{psStage = ReadingWeight acc}
+        | stripped == "End" -> finishNW acc st{psStage = BetweenSections}
+        | Just m <- detectMarker stripped -> finishNW acc st{psStage = stageFor m}
+        | otherwise -> st
+    ReadingNorm acc
+        | isBlank line -> st{psStage = NeedNWSection acc}
+        | stripped == "Weighting" -> st{psStage = ReadingWeight acc}
+        | stripped == "End" -> finishNW acc st{psStage = BetweenSections}
+        | Just (n, v) <- parseNameValue cfg line -> st{psStage = ReadingNorm (insertNorm n v acc)}
+        | otherwise -> st
+    ReadingWeight acc
+        | isBlank line -> st{psStage = NeedNWSection acc}
+        | stripped == "End" -> finishNW acc st{psStage = BetweenSections}
+        | Just m <- detectMarker stripped -> finishNW acc st{psStage = stageFor m}
+        | Just (n, v) <- parseNameValue cfg line -> st{psStage = ReadingWeight (insertWeight n v acc)}
         | otherwise -> st
-    PhReadingNorm
-        | isBlank line -> st{psPhase = PhExpectNWsection}
-        | stripped == "Weighting" -> st{psPhase = PhReadingWeight}
-        | stripped == "End" -> finishNWset st
-        | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-             in case fields of
-                    (name : val : _) ->
-                        let n = decodeBS (BS8.strip name)
-                            v = parseAmount (spDecimal cfg) (BS8.strip val)
-                         in st{psNormMap = M.insert n v (psNormMap st)}
-                    _ -> st
-    PhReadingWeight
-        | isBlank line -> st{psPhase = PhExpectNWsection}
-        | isNWsetMarker stripped ->
-            (finishNWset st){psPhase = PhExpectNWname}
-        | stripped == "End" -> finishNWset st
-        | otherwise ->
-            let fields = splitCSV (spDelimiter cfg) line
-             in case fields of
-                    (name : val : _) ->
-                        let n = decodeBS (BS8.strip name)
-                            v = parseAmount (spDecimal cfg) (BS8.strip val)
-                         in st{psWeightMap = M.insert n v (psWeightMap st)}
-                    _ -> st
   where
     stripped = BS8.strip line
 
-    finishCategory s =
-        let !m =
-                Method
-                    { methodId =
-                        UUID5.generateNamed
-                            simaproNamespace
-                            (BS.unpack $ TE.encodeUtf8 $ "method:" <> psCatName s)
-                    , methodName = psCatName s
-                    , methodDescription = Nothing
-                    , methodUnit = psCatUnit s
-                    , methodCategory = psCatName s
-                    , methodMethodology = Just methodologyName
-                    , methodFactors = reverse (psFactors s)
-                    }
-         in s{psPhase = PhExpectCategory, psFactors = [], psMethods = m : psMethods s}
-
-    finishDamageCategory s =
-        let !dc = DamageCategory (psDcName s) (psDcUnit s) (reverse (psDcImpacts s))
-         in s{psPhase = PhExpectCategory, psDcImpacts = [], psDamageCats = dc : psDamageCats s}
-
-    finishNWset s
-        | M.null (psNormMap s) && M.null (psWeightMap s) = s{psPhase = PhExpectCategory}
-        | otherwise =
-            let !nw = NormWeightSet (psNWname s) (psNormMap s) (psWeightMap s)
-             in s
-                    { psPhase = PhExpectCategory
-                    , psNormMap = M.empty
-                    , psWeightMap = M.empty
-                    , psNWname = ""
-                    , psNWsets = nw : psNWsets s
-                    }
-
+-- | Append the completed in-progress block (if any) onto the right list.
+-- Shared by 'step' (mid-stream, on blanks/markers/End) and 'finalize' (at EOF).
+finishCat :: CatAccum -> ParseState -> ParseState
+finishCat (CatAccum name unit factors) st
+    | null factors = st
+    | otherwise = st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st}
+
+finishDamage :: DamageAccum -> ParseState -> ParseState
+finishDamage (DamageAccum name unit impacts) st
+    | null impacts = st
+    | otherwise = st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st}
+
+finishNW :: NWAccum -> ParseState -> ParseState
+finishNW (NWAccum name norm weight) st
+    | M.null norm && M.null weight = st
+    | otherwise = st{psNWsets = NormWeightSet name norm weight : psNWsets st}
+
+-- | Flush whatever block is in progress at end of input, then read out the
+-- accumulated collections in source order.
 finalize :: ParseState -> MethodCollection
 finalize st =
-    let methods = case psPhase st of
-            PhReadingCFs
-                | not (null (psFactors st)) ->
-                    let !m =
-                            Method
-                                { methodId =
-                                    UUID5.generateNamed
-                                        simaproNamespace
-                                        (BS.unpack $ TE.encodeUtf8 $ "method:" <> psCatName st)
-                                , methodName = psCatName st
-                                , methodDescription = Nothing
-                                , methodUnit = psCatUnit st
-                                , methodCategory = psCatName st
-                                , methodMethodology = Nothing
-                                , methodFactors = reverse (psFactors st)
-                                }
-                     in reverse (m : psMethods st)
-            _ -> reverse (psMethods st)
-        -- Flush any pending NW set
-        nwSets = case psPhase st of
-            PhReadingWeight
-                | not (M.null (psWeightMap st)) ->
-                    let !nw = NormWeightSet (psNWname st) (psNormMap st) (psWeightMap st)
-                     in reverse (nw : psNWsets st)
-            PhReadingNorm
-                | not (M.null (psNormMap st)) ->
-                    let !nw = NormWeightSet (psNWname st) (psNormMap st) (psWeightMap st)
-                     in reverse (nw : psNWsets st)
-            _ -> reverse (psNWsets st)
-     in MethodCollection methods (reverse (psDamageCats st)) nwSets []
+    let s = finishCurrent st
+     in MethodCollection
+            (reverse (psMethods s))
+            (reverse (psDamageCats s))
+            (reverse (psNWsets s))
+            []
+
+finishCurrent :: ParseState -> ParseState
+finishCurrent st = case psStage st of
+    ReadingCFs acc -> finishCat acc st
+    ReadingDcImpacts acc -> finishDamage acc st
+    ReadingNorm acc -> finishNW acc st
+    ReadingWeight acc -> finishNW acc st
+    NeedNWSection acc -> finishNW acc st
+    -- Stages with no in-progress block to flush. Enumerated (not wildcarded)
+    -- so a future accumulator-carrying stage can't silently skip its EOF flush.
+    Header -> st
+    MethodMeta -> st
+    BetweenSections -> st
+    NeedCatLine -> st
+    NeedSubstances{} -> st
+    NeedDcLine -> st
+    NeedDcImpacts{} -> st
+    NeedNWName -> st
+
+consCF :: MethodCF -> CatAccum -> CatAccum
+consCF cf (CatAccum name unit factors) = CatAccum name unit (cf : factors)
+
+consImpact :: (Text, Double) -> DamageAccum -> DamageAccum
+consImpact i (DamageAccum name unit impacts) = DamageAccum name unit (i : impacts)
+
+insertNorm :: Text -> Double -> NWAccum -> NWAccum
+insertNorm n v (NWAccum name norm weight) = NWAccum name (M.insert n v norm) weight
+
+insertWeight :: Text -> Double -> NWAccum -> NWAccum
+insertWeight n v (NWAccum name norm weight) = NWAccum name norm (M.insert n v weight)
+
+buildMethod :: Text -> Text -> Text -> [MethodCF] -> Method
+buildMethod methodology name unit factors =
+    Method
+        { methodId =
+            UUID5.generateNamed
+                simaproNamespace
+                (BS.unpack $ TE.encodeUtf8 $ "method:" <> name)
+        , methodName = name
+        , methodDescription = Nothing
+        , methodUnit = unit
+        , methodCategory = name
+        , methodMethodology = Just methodology
+        , methodFactors = factors
+        }
+
+-- ============================================================================
+-- Line parsers
+-- ============================================================================
+
+-- | A line that begins a new section. "End" is deliberately not a marker — it
+-- only closes the current block — so the reading stages handle it inline.
+data Marker = MImpactCat | MDamageCat | MNWSet
+
+detectMarker :: BS.ByteString -> Maybe Marker
+detectMarker s
+    | s == "Impact category" = Just MImpactCat
+    | s == "Damage category" = Just MDamageCat
+    | isNWsetMarker s = Just MNWSet
+    | otherwise = Nothing
+
+stageFor :: Marker -> Stage
+stageFor MImpactCat = NeedCatLine
+stageFor MDamageCat = NeedDcLine
+stageFor MNWSet = NeedNWName
+
+-- | Parse one substance/CF row into a 'MethodCF', or 'Nothing' if the row is
+-- too short to be a factor line.
+parseCFRow :: SimaProConfig -> BS.ByteString -> Maybe MethodCF
+parseCFRow cfg line =
+    case splitCSV (spDelimiter cfg) line of
+        (comp : sub : name : cas : cfVal : cfUnit : _) ->
+            let !rawName = decodeBS (BS8.strip name)
+                -- Keep the full suffixed name so the CF's 'mcfFlowRef' UUID
+                -- matches the suffixed biosphere flow UUID parsed by
+                -- 'SimaPro.Parser.bioRowToExchange'. The location is also
+                -- exposed via 'mcfConsumerLocation' for regional dispatch on
+                -- engines that key CFs by activity location (openLCA JSON-LD);
+                -- SimaPro CSV CFs are already region-tagged in the name, so
+                -- dual storage is correct.
+                !mLoc = snd (extractLocationSuffix rawName)
+                !cfUnitT = decodeBS (BS8.strip cfUnit)
+                -- UUID hashed via the shared 'generateFlowUUID' +
+                -- 'normalizeSimaProCompartment' so the CF side and
+                -- 'SimaPro.Parser.bioRowToExchange' produce the same UUID for
+                -- the same flow.
+                !flowRef =
+                    generateFlowUUID
+                        rawName
+                        (normalizeSimaProCompartment (decodeBS comp) (decodeBS sub))
+                        cfUnitT
+                !cf =
+                    MethodCF
+                        { mcfFlowRef = flowRef
+                        , mcfFlowName = rawName
+                        , mcfDirection = direction comp
+                        , mcfValue = parseAmount (spDecimal cfg) (BS8.strip cfVal)
+                        , mcfCompartment = mkCompartment comp sub
+                        , mcfCAS = normalizeCAS (decodeBS (BS8.strip cas))
+                        , mcfUnit = cfUnitT
+                        , mcfConsumerLocation = mLoc
+                        }
+             in Just cf
+        _ -> Nothing
+
+-- | Parse a two-column @name;value@ row (damage impacts, normalization,
+-- weighting), or 'Nothing' if the row lacks both columns.
+parseNameValue :: SimaProConfig -> BS.ByteString -> Maybe (Text, Double)
+parseNameValue cfg line =
+    case splitCSV (spDelimiter cfg) line of
+        (name : val : _) ->
+            Just (decodeBS (BS8.strip name), parseAmount (spDecimal cfg) (BS8.strip val))
+        _ -> Nothing
+
+-- | Parse a @Name;Unit@ header line. Total: a missing unit yields "".
+parseNameUnit :: SimaProConfig -> BS.ByteString -> (Text, Text)
+parseNameUnit cfg line =
+    case splitCSV (spDelimiter cfg) line of
+        (name : unit : _) -> (decodeBS (BS8.strip name), decodeBS (BS8.strip unit))
+        [name] -> (decodeBS (BS8.strip name), "")
+        [] -> ("", "")
 
 -- ============================================================================
 -- Helpers
@@ -364,10 +361,6 @@ parseMethodName _cfg = go False
 isBlank :: BS.ByteString -> Bool
 isBlank = BS.null . BS8.strip
 
-head' :: [a] -> a
-head' (x : _) = x
-head' [] = error "Method.ParserSimaPro: unexpected empty field list"
-
 direction :: BS.ByteString -> FlowDirection
 direction comp
     | lc == "raw" || lc == "resources" || "raw" `BS8.isPrefixOf` lc = Input

From 8950615e8315c5692f96b66e58d76e0ec08bb898 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 17:52:41 +0200
Subject: [PATCH 34/43] refactor(ecospold2): factor parseWithXeno into SAX
 combinators (#97)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

`parseWithXeno` was a single ~465-line function. Its `closeTag` handler
held two near-duplicate ~115-line exchange-finalization blocks plus a
dozen field setters, all repeating the same three idioms: text harvest,
path pop, and per-kind `InIntermediateExchange`/`InElementaryExchange`
dispatch.

This extracts small top-level pure combinators so each block now carries
only its domain decision logic:

- **idioms** — `accumText`, `popPath`, `popText`, `pathAt`
- **dispatch** — `onExchange` (over `mapExchange`); the exhaustive
`ElementContext` matches live in `currentIntermediate` /
`currentElementary` / `inGeneralComment`, so call sites stay
wildcard-free
- **exchange close** — `mkUnit`, `resolveGroups`, `missingUnitWarning`,
`parseUUIDOrNil`, `finishExchange`, and the `add*` state pushes
- **plumbing** — result handling becomes an `Either`-monad do-block
(`Data.Bifunctor.first`); `buildResult`'s tail becomes an `fmap`

Combinators are kept allocation-neutral (no lens on this hot strict
fold). Net **−29 lines**, single file.

## Test plan

- [x] `cabal build` clean — only the 2 pre-existing MCP warnings, none
from this file
- [x] `cabal test` — 1107/1107 hspec green, 1 pending (unchanged from
baseline)
- [x] EcoSpold2 group green: per-exchange comments, property-comment
isolation, waste patterns A & B, native activityType, malformed-input
`Left`
---
 src/EcoSpold/Parser2.hs | 497 +++++++++++++++++++---------------------
 1 file changed, 234 insertions(+), 263 deletions(-)

diff --git a/src/EcoSpold/Parser2.hs b/src/EcoSpold/Parser2.hs
index 904a0131..d82fe7d3 100644
--- a/src/EcoSpold/Parser2.hs
+++ b/src/EcoSpold/Parser2.hs
@@ -4,6 +4,7 @@
 
 module EcoSpold.Parser2 (streamParseActivityAndFlowsFromFile, normalizeCAS) where
 
+import Data.Bifunctor (first)
 import qualified Data.ByteString as BS
 import qualified Data.Map as M
 import Data.Maybe (catMaybes, fromMaybe)
@@ -224,14 +225,146 @@ ecoSpoldSpecialActivityTypeLabel = \case
     6 -> "Import activity"
     n -> "Unknown (code " <> T.pack (show n) <> ")"
 
+-- ============================================================================
+-- SAX state combinators (pure, allocation-neutral — inlined into the fold)
+-- ============================================================================
+
+-- | Concatenated, entity-decoded text accumulated since the element opened.
+accumText :: ParseState -> Text
+accumText = T.concat . reverse . map bsToText . psTextAccum
+
+-- | Pop one element off the path stack.
+popPath :: ParseState -> ParseState
+popPath st = st{psPath = drop 1 (psPath st)}
+
+-- | The common close-tag epilogue: pop the path and discard accumulated text.
+popText :: ParseState -> ParseState
+popText st = (popPath st){psTextAccum = []}
+
+-- | Is the element at the given depth (0 = currently-open tag) named @name@?
+pathAt :: Int -> BS.ByteString -> ParseState -> Bool
+pathAt depth name st = case drop depth (psPath st) of
+    (e : _) -> isElement e name
+    [] -> False
+
+-- | Transform the open exchange accumulator, leaving non-exchange contexts
+-- untouched. The exhaustive match lives here so the call sites stay wildcard-free.
+mapExchange ::
+    (IntermediateData -> IntermediateData) ->
+    (ElementaryData -> ElementaryData) ->
+    ElementContext ->
+    ElementContext
+mapExchange fi fe = \case
+    InIntermediateExchange d -> InIntermediateExchange (fi d)
+    InElementaryExchange d -> InElementaryExchange (fe d)
+    InActivityName -> InActivityName
+    InGeographyShortname -> InGeographyShortname
+    InGeneralCommentText i -> InGeneralCommentText i
+    Other -> Other
+
+-- | Apply per-kind updates to the open exchange accumulator, then pop path+text.
+onExchange ::
+    (IntermediateData -> IntermediateData) ->
+    (ElementaryData -> ElementaryData) ->
+    ParseState ->
+    ParseState
+onExchange fi fe st = popText st{psContext = mapExchange fi fe (psContext st)}
+
+-- | The currently-open intermediate exchange, if any.
+currentIntermediate :: ParseState -> Maybe IntermediateData
+currentIntermediate st = case psContext st of
+    InIntermediateExchange d -> Just d
+    InElementaryExchange _ -> Nothing
+    InActivityName -> Nothing
+    InGeographyShortname -> Nothing
+    InGeneralCommentText _ -> Nothing
+    Other -> Nothing
+
+-- | The currently-open elementary exchange, if any.
+currentElementary :: ParseState -> Maybe ElementaryData
+currentElementary st = case psContext st of
+    InElementaryExchange d -> Just d
+    InIntermediateExchange _ -> Nothing
+    InActivityName -> Nothing
+    InGeographyShortname -> Nothing
+    InGeneralCommentText _ -> Nothing
+    Other -> Nothing
+
+-- | Are we inside a @generalComment@ @\<text\>@ element?
+inGeneralComment :: ParseState -> Bool
+inGeneralComment st = case psContext st of
+    InGeneralCommentText _ -> True
+    InIntermediateExchange _ -> False
+    InElementaryExchange _ -> False
+    InActivityName -> False
+    InGeographyShortname -> False
+    Other -> False
+
+-- | Build a 'Unit', substituting placeholders for a missing unit name.
+mkUnit :: UUID -> Text -> Unit
+mkUnit uuid name
+    | T.null name = Unit uuid "UNKNOWN_UNIT" "?" ""
+    | otherwise = Unit uuid name name ""
+
+-- | Resolve in/out group: prefer the attribute value, fall back to the pending
+-- value captured from the child @\<inputGroup\>@ / @\<outputGroup\>@ element.
+resolveGroups :: Text -> Text -> ParseState -> (Text, Text)
+resolveGroups inG outG st =
+    ( if T.null inG then psPendingInputGroup st else inG
+    , if T.null outG then psPendingOutputGroup st else outG
+    )
+
+-- | Warning emitted (as a singleton, else empty) when an exchange has no unit name.
+missingUnitWarning :: String -> Text -> Text -> [String]
+missingUnitWarning kind flowId unitName =
+    [ "[WARNING] Missing unit name for "
+        ++ kind
+        ++ " exchange with flow ID: "
+        ++ T.unpack flowId
+        ++ " - using 'UNKNOWN_UNIT' placeholder"
+    | T.null unitName
+    ]
+
+-- | Parse a UUID, treating the empty string as the nil UUID (no warning).
+parseUUIDOrNil :: Text -> (UUID, Maybe String)
+parseUUIDOrNil t
+    | T.null t = (UUID.nil, Nothing)
+    | otherwise = parseUUID t
+
+-- | Second argument when non-blank, otherwise the fallback.
+nonBlankOr :: Text -> Text -> Text
+nonBlankOr fallback t = if T.null t then fallback else t
+
+addExchange :: Exchange -> ParseState -> ParseState
+addExchange ex st = st{psExchanges = ex : psExchanges st}
+
+addTechFlow :: TechnosphereFlow -> ParseState -> ParseState
+addTechFlow f st = st{psTechFlows = f : psTechFlows st}
+
+addBioFlow :: BiosphereFlow -> ParseState -> ParseState
+addBioFlow f st = st{psBioFlows = f : psBioFlows st}
+
+addWasteFlow :: WasteFlow -> ParseState -> ParseState
+addWasteFlow f st = st{psWasteFlows = f : psWasteFlows st}
+
+-- | Common exchange-close bookkeeping: leave the exchange context, pop the
+-- path/text, clear pending groups, record the unit and any warnings.
+finishExchange :: Unit -> [String] -> ParseState -> ParseState
+finishExchange unit warns st =
+    (popText st)
+        { psContext = Other
+        , psPendingInputGroup = ""
+        , psPendingOutputGroup = ""
+        , psUnits = unit : psUnits st
+        , psWarnings = warns ++ psWarnings st
+        }
+
 -- | Xeno SAX parser implementation
 parseWithXeno :: BS.ByteString -> ProcessId -> Either String ((Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit]), [String])
-parseWithXeno xmlContent processId =
-    case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of
-        Left err -> Left (show err)
-        Right finalState -> case buildResult finalState processId of
-            Left err -> Left err
-            Right result -> Right (result, reverse $ psWarnings finalState)
+parseWithXeno xmlContent processId = do
+    finalState <- first show (X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent)
+    result <- buildResult finalState processId
+    pure (result, reverse (psWarnings finalState))
   where
     -- Open tag handler - update path and context
     openTag state tagName =
@@ -258,17 +391,12 @@ parseWithXeno xmlContent processId =
 
     -- Attribute handler - extract attributes
     attribute state name value =
-        let isInsideProperty = case psPath state of
-                [] -> False
-                (current : _) -> isElement current "property"
-            isOnComment = case psPath state of
-                [] -> False
-                (current : _) -> isElement current "comment"
+        let isInsideProperty = pathAt 0 "property" state
             -- xml:lang on the currently-open <comment>; remembered until closeTag.
             -- Attribute order is not significant for entity ref selection — we
             -- only need the lang at close-time.
             withLang st
-                | isOnComment && isElement name "xml:lang" = st{psPendingCommentLang = bsToText value}
+                | pathAt 0 "comment" state && isElement name "xml:lang" = st{psPendingCommentLang = bsToText value}
                 | otherwise = st
          in case psContext state of
                 InIntermediateExchange idata ->
@@ -297,9 +425,7 @@ parseWithXeno xmlContent processId =
                 _ ->
                     -- Attributes on the <activity> opening tag carry the
                     -- ecospold2 activityType and specialActivityType enums.
-                    let onActivity = case psPath state of
-                            (current : _) -> isElement current "activity"
-                            [] -> False
+                    let onActivity = pathAt 0 "activity" state
                         captured
                             | onActivity && isElement name "activityType" =
                                 state{psActivityType = bsToIntMaybe value}
@@ -321,60 +447,48 @@ parseWithXeno xmlContent processId =
     -- Close tag handler - finalize elements
     closeTag state tagName
         | isElement tagName "activityName" =
-            let txt = T.concat $ reverse $ map bsToText (psTextAccum state)
-             in state{psActivityName = Just txt, psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
+            (popText state){psActivityName = Just (accumText state), psContext = Other}
         | isElement tagName "comment" =
             -- Capture <comment> text only when the immediate parent is the
             -- exchange itself, not a nested <property>. Property comments
             -- describe the property (e.g. "carbon content"), not the exchange.
-            let parent = case drop 1 (psPath state) of
-                    (p : _) -> p
-                    [] -> ""
-                txt = T.concat $ reverse $ map bsToText (psTextAccum state)
+            let txt = accumText state
                 lang = psPendingCommentLang state
-                popPath = state{psPath = drop 1 (psPath state), psTextAccum = [], psPendingCommentLang = ""}
-             in case psContext state of
-                    InIntermediateExchange idata
-                        | isElement parent "intermediateExchange" ->
-                            popPath{psContext = InIntermediateExchange idata{idComment = pickComment (idComment idata) lang txt}}
-                    InElementaryExchange edata
-                        | isElement parent "elementaryExchange" ->
-                            popPath{psContext = InElementaryExchange edata{edComment = pickComment (edComment edata) lang txt}}
-                    _ -> popPath
+                st' =
+                    onExchange
+                        (\d -> if pathAt 1 "intermediateExchange" state then d{idComment = pickComment (idComment d) lang txt} else d)
+                        (\d -> if pathAt 1 "elementaryExchange" state then d{edComment = pickComment (edComment d) lang txt} else d)
+                        state
+             in st'{psPendingCommentLang = ""}
         | isElement tagName "shortname" && psContext state == InGeographyShortname =
-            let txt = T.concat $ reverse $ map bsToText (psTextAccum state)
-             in state{psLocation = Just txt, psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []}
+            (popText state){psLocation = Just (accumText state), psContext = Other}
         | isElement tagName "intermediateExchange" =
-            case psContext state of
-                InIntermediateExchange idata ->
-                    -- Use pending group values if attribute values are empty
-                    let finalInputGroup = if T.null (idInputGroup idata) then psPendingInputGroup state else idInputGroup idata
-                        finalOutputGroup = if T.null (idOutputGroup idata) then psPendingOutputGroup state else idOutputGroup idata
-                        isInput = not $ T.null finalInputGroup
+            case currentIntermediate state of
+                Nothing -> popPath state
+                Just idata ->
+                    let (finalInputGroup, finalOutputGroup) = resolveGroups (idInputGroup idata) (idOutputGroup idata) state
+                        isInput = not (T.null finalInputGroup)
                         isOutput = T.null finalInputGroup
-                        -- Reference flow identification:
-                        -- Reference products are identified ONLY by outputGroup="0"
-                        -- This works for both normal production (positive amount) and waste treatment (negative amount)
-                        -- Negative inputs (like wastewater discharge) should NOT be considered reference products
-                        -- outputGroup valid values: 0=reference product, 1-3=byproducts, 4=allocated byproduct, 5=recyclable
+                        -- Reference products are identified ONLY by outputGroup="0"; this holds for
+                        -- normal production (positive amount) and waste treatment (negative amount).
+                        -- Negative inputs (e.g. wastewater discharge) are never reference products.
                         isReferenceProduct = isOutput && finalOutputGroup == "0"
-                        -- Pattern B: intermediateExchange tagged as Waste via classification
-                        -- (System='By-product classification', Value='Waste'). When set, the flow
-                        -- represents a waste output that consumers treat via a treatment activity.
+                        -- Pattern B: intermediateExchange tagged Waste via classification
+                        -- (System='By-product classification', Value='Waste') — a waste output that
+                        -- consumers treat via a treatment activity.
                         isWasteFlow = M.lookup "By-product classification" (idClassifications idata) == Just "Waste"
-                        -- Parse UUIDs and collect warnings
                         (flowUUID, flowWarn) = parseUUID (idFlowId idata)
                         (unitUUID, unitWarn) = parseUUID (idUnitId idata)
-                        (linkUUID, linkWarn) =
-                            if T.null (idActivityLinkId idata)
-                                then (UUID.nil, Nothing)
-                                else parseUUID (idActivityLinkId idata)
-                        uuidWarnings = catMaybes [flowWarn, unitWarn, linkWarn]
+                        (linkUUID, linkWarn) = parseUUIDOrNil (idActivityLinkId idata)
+                        warns =
+                            catMaybes [flowWarn, unitWarn, linkWarn]
+                                ++ missingUnitWarning "intermediate" (idFlowId idata) (idUnitName idata)
                         techRoleFor
                             | isReferenceProduct = ReferenceProduct
                             | isInput = Input
                             | otherwise = Coproduct
-                        resolvedFlowName = if T.null (idFlowName idata) then idFlowId idata else idFlowName idata
+                        resolvedFlowName = nonBlankOr (idFlowId idata) (idFlowName idata)
+                        unit = mkUnit unitUUID (idUnitName idata)
                         techExchange =
                             TechnosphereExchange
                                 { techFlowId = flowUUID
@@ -387,14 +501,7 @@ parseWithXeno xmlContent processId =
                                 , techComment = snd <$> idComment idata
                                 , techPedigree = Nothing
                                 }
-                        techFlow =
-                            TechnosphereFlow
-                                flowUUID
-                                resolvedFlowName
-                                unitUUID
-                                (idSynonyms idata)
-                                Nothing -- CAS
-                                Nothing -- substanceId
+                        techFlow = TechnosphereFlow flowUUID resolvedFlowName unitUUID (idSynonyms idata) Nothing Nothing
                         wasteExchange =
                             WasteExchange
                                 { waFlowId = flowUUID
@@ -407,64 +514,24 @@ parseWithXeno xmlContent processId =
                                 , waComment = snd <$> idComment idata
                                 , waPedigree = Nothing
                                 }
-                        wasteFlow =
-                            WasteFlow
-                                flowUUID
-                                resolvedFlowName
-                                unitUUID
-                                (idSynonyms idata)
-                                Nothing -- CAS
-                                Nothing -- substanceId
-                        unitNameWarning =
-                            [ "[WARNING] Missing unit name for intermediate exchange with flow ID: "
-                                ++ T.unpack (idFlowId idata)
-                                ++ " - using 'UNKNOWN_UNIT' placeholder"
-                            | T.null (idUnitName idata)
-                            ]
-                        unit =
-                            Unit
-                                unitUUID
-                                (if T.null (idUnitName idata) then "UNKNOWN_UNIT" else idUnitName idata)
-                                (if T.null (idUnitName idata) then "?" else idUnitName idata)
-                                ""
-                        -- Reference product unit: only meaningful when the flow stays in the technosphere
+                        wasteFlow = WasteFlow flowUUID resolvedFlowName unitUUID (idSynonyms idata) Nothing Nothing
+                        -- Reference product unit only when the flow stays in the technosphere
                         -- (waste outputs are never the reference product of a producing process).
                         newRefUnit =
                             if isReferenceProduct && not isWasteFlow && not (T.null (idUnitName idata))
                                 then Just (idUnitName idata)
                                 else psRefUnit state
-                        baseState =
-                            state
-                                { psContext = Other
-                                , psPath = drop 1 (psPath state)
-                                , psTextAccum = []
-                                , psPendingInputGroup = ""
-                                , psPendingOutputGroup = ""
-                                , psRefUnit = newRefUnit
-                                , psUnits = unit : psUnits state
-                                , psWarnings = uuidWarnings ++ unitNameWarning ++ psWarnings state
-                                }
+                        base = (finishExchange unit warns state){psRefUnit = newRefUnit}
                      in if isWasteFlow
-                            then
-                                baseState
-                                    { psExchanges = wasteExchange : psExchanges state
-                                    , psWasteFlows = wasteFlow : psWasteFlows state
-                                    }
-                            else
-                                baseState
-                                    { psExchanges = techExchange : psExchanges state
-                                    , psTechFlows = techFlow : psTechFlows state
-                                    }
-                _ -> state{psPath = drop 1 (psPath state)}
+                            then addExchange wasteExchange (addWasteFlow wasteFlow base)
+                            else addExchange techExchange (addTechFlow techFlow base)
         | isElement tagName "elementaryExchange" =
-            case psContext state of
-                InElementaryExchange edata ->
-                    -- Use pending group values if attribute values are empty
-                    let finalInputGroup = if T.null (edInputGroup edata) then psPendingInputGroup state else edInputGroup edata
-                        finalOutputGroup = if T.null (edOutputGroup edata) then psPendingOutputGroup state else edOutputGroup edata
-                        -- A missing compartment becomes 'Nothing', not an empty
-                        -- 'Compartment ""' sentinel — the latter used to silently
-                        -- collide with method-side empty mediums.
+            case currentElementary state of
+                Nothing -> popPath state
+                Just edata ->
+                    let (finalInputGroup, finalOutputGroup) = resolveGroups (edInputGroup edata) (edOutputGroup edata) state
+                        -- A missing compartment becomes 'Nothing', not an empty 'Compartment ""'
+                        -- sentinel — the latter used to silently collide with method-side empty mediums.
                         mCompName = case edCompartments edata of
                             (c : _) | not (T.null c) -> Just c
                             _ -> Nothing
@@ -475,29 +542,29 @@ parseWithXeno xmlContent processId =
                             (Nothing, Nothing) -> Nothing
                             (Just c, sc) -> Just (Compartment c sc)
                             (Nothing, Just _) -> Nothing -- sub without medium is meaningless; drop
-                            -- Determine the biosphere direction.
-                            -- Primary: use inputGroup/outputGroup if present.
-                            -- Fallback: compartment heuristic — natural-resource flows are extractions.
+                        -- Biosphere direction: prefer inputGroup/outputGroup, else fall back to the
+                        -- compartment heuristic (natural-resource flows are extractions).
                         direction
                             | not (T.null finalInputGroup) = Resource
                             | not (T.null finalOutputGroup) = Emission
                             | otherwise = case edCompartments edata of
                                 (comp : _) | T.toLower comp == "natural resource" -> Resource
                                 _ -> Emission
-                        -- Pattern A: elementaryExchange with compartment="inventory indicator"
-                        -- subcompartment="waste". Waste outputs surfaced through the
-                        -- elementary axis but semantically technosphere waste — route them
-                        -- to WasteExchange instead of BiosphereExchange.
+                        -- Pattern A: compartment="inventory indicator" / subcompartment="waste".
+                        -- Surfaced through the elementary axis but semantically technosphere waste —
+                        -- route to WasteExchange instead of BiosphereExchange.
                         isInventoryIndicatorWaste = case (mCompName, subCompartment) of
                             (Just c, Just s) ->
                                 T.toLower (T.strip c) == "inventory indicator"
                                     && T.toLower (T.strip s) == "waste"
                             _ -> False
-                        -- Parse UUIDs and collect warnings
                         (flowUUID, flowWarn) = parseUUID (edFlowId edata)
                         (unitUUID, unitWarn) = parseUUID (edUnitId edata)
-                        uuidWarnings = catMaybes [flowWarn, unitWarn]
-                        resolvedFlowName = if T.null (edFlowName edata) then edFlowId edata else edFlowName edata
+                        warns =
+                            catMaybes [flowWarn, unitWarn]
+                                ++ missingUnitWarning "elementary" (edFlowId edata) (edUnitName edata)
+                        resolvedFlowName = nonBlankOr (edFlowId edata) (edFlowName edata)
+                        unit = mkUnit unitUUID (edUnitName edata)
                         bioExchange =
                             BiosphereExchange
                                 { bioFlowId = flowUUID
@@ -508,15 +575,7 @@ parseWithXeno xmlContent processId =
                                 , bioComment = snd <$> edComment edata
                                 , bioPedigree = Nothing
                                 }
-                        bioFlow =
-                            BiosphereFlow
-                                flowUUID
-                                resolvedFlowName
-                                unitUUID
-                                (edSynonyms edata)
-                                (edCAS edata)
-                                Nothing -- substanceId - to be filled later
-                                compartment
+                        bioFlow = BiosphereFlow flowUUID resolvedFlowName unitUUID (edSynonyms edata) (edCAS edata) Nothing compartment
                         wasteExchange =
                             WasteExchange
                                 { waFlowId = flowUUID
@@ -529,147 +588,61 @@ parseWithXeno xmlContent processId =
                                 , waComment = snd <$> edComment edata
                                 , waPedigree = Nothing
                                 }
-                        wasteFlow =
-                            WasteFlow
-                                flowUUID
-                                resolvedFlowName
-                                unitUUID
-                                (edSynonyms edata)
-                                (edCAS edata)
-                                Nothing -- substanceId
-                        unitNameWarning =
-                            [ "[WARNING] Missing unit name for elementary exchange with flow ID: "
-                                ++ T.unpack (edFlowId edata)
-                                ++ " - using 'UNKNOWN_UNIT' placeholder"
-                            | T.null (edUnitName edata)
-                            ]
-                        unit =
-                            Unit
-                                unitUUID
-                                (if T.null (edUnitName edata) then "UNKNOWN_UNIT" else edUnitName edata)
-                                (if T.null (edUnitName edata) then "?" else edUnitName edata)
-                                ""
-                        baseState =
-                            state
-                                { psContext = Other
-                                , psPath = drop 1 (psPath state)
-                                , psTextAccum = []
-                                , psPendingInputGroup = ""
-                                , psPendingOutputGroup = ""
-                                , psUnits = unit : psUnits state
-                                , psWarnings = uuidWarnings ++ unitNameWarning ++ psWarnings state
-                                }
+                        wasteFlow = WasteFlow flowUUID resolvedFlowName unitUUID (edSynonyms edata) (edCAS edata) Nothing
+                        base = finishExchange unit warns state
                      in if isInventoryIndicatorWaste
-                            then
-                                baseState
-                                    { psExchanges = wasteExchange : psExchanges state
-                                    , psWasteFlows = wasteFlow : psWasteFlows state
-                                    }
-                            else
-                                baseState
-                                    { psExchanges = bioExchange : psExchanges state
-                                    , psBioFlows = bioFlow : psBioFlows state
-                                    }
-                _ -> state{psPath = drop 1 (psPath state)}
+                            then addExchange wasteExchange (addWasteFlow wasteFlow base)
+                            else addExchange bioExchange (addBioFlow bioFlow base)
         | isElement tagName "text" =
-            case psContext state of
-                InGeneralCommentText _idx ->
-                    let txt = T.concat $ reverse $ map bsToText (psTextAccum state)
-                     in -- Store as (index, text) pair for later sorting
-                        if T.null txt
-                            then state{psContext = Other, psTextAccum = []}
-                            else state{psDescription = txt : psDescription state, psContext = Other, psTextAccum = []}
-                _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
+            -- The generalComment <text> branch deliberately does NOT pop the path.
+            if inGeneralComment state
+                then
+                    let txt = accumText state
+                        withDesc = if T.null txt then state else state{psDescription = txt : psDescription state}
+                     in withDesc{psContext = Other, psTextAccum = []}
+                else popText state
         | isElement tagName "name" =
-            let txt = T.concat $ reverse $ map bsToText (psTextAccum state)
-                isInsideProperty = case psPath state of
-                    (_ : parent : _) -> isElement parent "property"
-                    _ -> False
-             in case psContext state of
-                    InIntermediateExchange idata
-                        | not isInsideProperty ->
-                            state{psContext = InIntermediateExchange idata{idFlowName = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InElementaryExchange edata
-                        | not isInsideProperty ->
-                            state{psContext = InElementaryExchange edata{edFlowName = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
+            let txt = accumText state
+             in if pathAt 1 "property" state
+                    then popText state
+                    else onExchange (\d -> d{idFlowName = txt}) (\d -> d{edFlowName = txt}) state
         | isElement tagName "unitName" =
-            let txt = T.concat $ reverse $ map bsToText (psTextAccum state)
-                isInsideProperty = case psPath state of
-                    (_ : parent : _) -> isElement parent "property"
-                    _ -> False
-             in case psContext state of
-                    InIntermediateExchange idata
-                        | not isInsideProperty ->
-                            state{psContext = InIntermediateExchange idata{idUnitName = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InElementaryExchange edata
-                        | not isInsideProperty ->
-                            state{psContext = InElementaryExchange edata{edUnitName = txt}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
+            let txt = accumText state
+             in if pathAt 1 "property" state
+                    then popText state
+                    else onExchange (\d -> d{idUnitName = txt}) (\d -> d{edUnitName = txt}) state
         | isElement tagName "synonym" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InIntermediateExchange idata
-                        | not (T.null txt) ->
-                            let syns = M.insertWith S.union "en" (S.singleton txt) (idSynonyms idata)
-                             in state{psContext = InIntermediateExchange idata{idSynonyms = syns}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    InElementaryExchange edata
-                        | not (T.null txt) ->
-                            let syns = M.insertWith S.union "en" (S.singleton txt) (edSynonyms edata)
-                             in state{psContext = InElementaryExchange edata{edSynonyms = syns}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ -> state{psPath = drop 1 (psPath state), psTextAccum = []}
+            let txt = T.strip (accumText state)
+                ins m = if T.null txt then m else M.insertWith S.union "en" (S.singleton txt) m
+             in onExchange (\d -> d{idSynonyms = ins (idSynonyms d)}) (\d -> d{edSynonyms = ins (edSynonyms d)}) state
+        -- inputGroup / outputGroup: stash the pending value, keep the parent exchange context.
         | isElement tagName "inputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in -- DON'T change psContext - preserve the parent exchange context
-                state{psPendingInputGroup = txt, psPath = drop 1 (psPath state), psTextAccum = []}
+            (popText state){psPendingInputGroup = T.strip (accumText state)}
         | isElement tagName "outputGroup" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in -- DON'T change psContext - preserve the parent exchange context
-                state{psPendingOutputGroup = txt, psPath = drop 1 (psPath state), psTextAccum = []}
+            (popText state){psPendingOutputGroup = T.strip (accumText state)}
         | isElement tagName "compartment" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InElementaryExchange edata
-                        | not (T.null txt) ->
-                            state{psContext = InElementaryExchange edata{edCompartments = txt : edCompartments edata}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ ->
-                        state{psPath = drop 1 (psPath state), psTextAccum = []}
+            let txt = T.strip (accumText state)
+                add d = if T.null txt then d else d{edCompartments = txt : edCompartments d}
+             in onExchange id add state
         | isElement tagName "subcompartment" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in case psContext state of
-                    InElementaryExchange edata
-                        | not (T.null txt) ->
-                            state{psContext = InElementaryExchange edata{edSubcompartments = txt : edSubcompartments edata}, psPath = drop 1 (psPath state), psTextAccum = []}
-                    _ ->
-                        state{psPath = drop 1 (psPath state), psTextAccum = []}
+            let txt = T.strip (accumText state)
+                add d = if T.null txt then d else d{edSubcompartments = txt : edSubcompartments d}
+             in onExchange id add state
         | isElement tagName "classificationSystem" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
-             in state{psPendingClassSystem = txt, psPath = drop 1 (psPath state), psTextAccum = []}
+            (popText state){psPendingClassSystem = T.strip (accumText state)}
         | isElement tagName "classificationValue" =
-            let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state)
+            let txt = T.strip (accumText state)
                 sys = psPendingClassSystem state
-                emptyPair = T.null sys || T.null txt
-             in case psContext state of
-                    InIntermediateExchange idata
-                        | not emptyPair ->
-                            state
-                                { psContext =
-                                    InIntermediateExchange
-                                        idata{idClassifications = M.insert sys txt (idClassifications idata)}
-                                , psPath = drop 1 (psPath state)
-                                , psTextAccum = []
-                                }
-                    _ ->
-                        state
-                            { psClassifications =
-                                if emptyPair
-                                    then psClassifications state
-                                    else M.insert sys txt (psClassifications state)
-                            , psPath = drop 1 (psPath state)
-                            , psTextAccum = []
-                            }
+             in popText $
+                    if T.null sys || T.null txt
+                        then state
+                        else case currentIntermediate state of
+                            -- Exchange-scoped classification (e.g. By-product → Waste).
+                            Just d -> state{psContext = InIntermediateExchange d{idClassifications = M.insert sys txt (idClassifications d)}}
+                            -- Otherwise an activity-level classification.
+                            Nothing -> state{psClassifications = M.insert sys txt (psClassifications state)}
         | otherwise =
-            state{psPath = drop 1 (psPath state)}
+            popPath state
 
     -- CDATA handler - treat as text
     cdata = text
@@ -688,9 +661,7 @@ parseWithXeno xmlContent processId =
             bios = reverse (psBioFlows st)
             wastes = reverse (psWasteFlows st)
             units = reverse (psUnits st)
-         in case applyCutoffStrategy activity of
-                Right act -> Right (act, techs, bios, wastes, units)
-                Left err -> Left err
+         in (\act -> (act, techs, bios, wastes, units)) <$> applyCutoffStrategy activity
 
 -- | Parse EcoSpold file using Xeno SAX parser
 streamParseActivityAndFlowsFromFile :: FilePath -> IO (Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit]))

From cad79700719a2a6bd1ed530b3a81668c2b408ab7 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 15:46:21 +0200
Subject: [PATCH 35/43] refactor(mcp): extract shared ExceptT plumbing for tool
 handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tool handlers each re-inlined the same wrapper, DB lookup, and
error-lifting boilerplate. Factor it into four small primitives —
runTool (the runExceptT/toolError envelope), requireDatabase, liftShow,
and transformers' own `except` in place of hand-rolled `ExceptT . pure` —
and migrate the already-ExceptT handlers onto them.

Behaviour-preserving: same toolError/toolSuccess outputs, same
short-circuit order. Removes one do-block nesting level from every
converted handler (hence the indentation churn).
---
 src/API/MCP.hs | 862 ++++++++++++++++++++++++-------------------------
 1 file changed, 424 insertions(+), 438 deletions(-)

diff --git a/src/API/MCP.hs b/src/API/MCP.hs
index 81a15ee5..443c9fd5 100644
--- a/src/API/MCP.hs
+++ b/src/API/MCP.hs
@@ -28,8 +28,9 @@ import System.Random (randomIO)
 import API.Resources (Param (..), ParamKind (..), Resource)
 import qualified API.Resources as R
 import Config (ClassificationEntry (..), ClassificationPreset (..), DatabaseConfig (..))
+import Control.Applicative ((<|>))
 import Control.Monad.IO.Class (liftIO)
-import Control.Monad.Trans.Except (ExceptT (..), runExceptT, throwE)
+import Control.Monad.Trans.Except (ExceptT (..), except, runExceptT, throwE)
 import Database.Manager (DatabaseManager (..), LoadedDatabase (..), getDatabase)
 import qualified Database.Manager as DM
 
@@ -376,6 +377,28 @@ withDb dbManager rid args action =
                 Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
                 Just ld -> action (ldDatabase ld, ldSharedSolver ld)
 
+-- ---------------------------------------------------------------------------
+-- ExceptT plumbing shared by every handler
+-- ---------------------------------------------------------------------------
+
+{- | Run a handler body in the shared 'ExceptT Text IO Value' monad: any
+'throwE'/'Left' short-circuits to a 'toolError', a success passes through.
+Every tool handler is @runTool rid $ do …@.
+-}
+runTool :: Value -> ExceptT Text IO Value -> IO Value
+runTool rid = fmap (either (toolError rid) id) . runExceptT
+
+{- | Resolve a loaded database by name, short-circuiting with the standard
+"not loaded" message. The 'ExceptT' counterpart to 'withDb'.
+-}
+requireDatabase :: DatabaseManager -> Text -> ExceptT Text IO LoadedDatabase
+requireDatabase dbManager dbName =
+    ExceptT $ maybe (Left ("Database not loaded: " <> dbName)) Right <$> getDatabase dbManager dbName
+
+-- | Lift an 'Either' whose error only has a 'Show' instance into the handler monad.
+liftShow :: (Show e) => Either e a -> ExceptT Text IO a
+liftShow = either (throwE . T.pack . show) pure
+
 textArg :: Text -> KeyMap Value -> Maybe Text
 textArg key args = case KM.lookup (fromText key) args of
     Just (String t) -> Just t
@@ -824,66 +847,58 @@ so inventories from dep DBs are merged into the returned flows.
 -}
 callGetInventory :: DatabaseManager -> Value -> KeyMap Value -> IO Value
 callGetInventory dbManager rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                (dbName, pid) <- ExceptT $ pure $ (,) <$> requireText "database" args <*> requireText "process_id" args
-                mLoaded <- liftIO $ getDatabase dbManager dbName
-                ld <- case mLoaded of
-                    Nothing -> throwE ("Database not loaded: " <> dbName)
-                    Just x -> pure x
-                let db = ldDatabase ld
-                    solver = ldSharedSolver ld
-                    limit = fromMaybe 50 (intArg "limit" args)
-                    nameFilter = textArg "flow" args
-                ExceptT $ pure $ ensureLinked dbName "computing inventory" db
-                (processId, activity) <- case Service.resolveActivityAndProcessId db pid of
-                    Left err -> throwE (T.pack (show err))
-                    Right v -> pure v
-                subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
-                unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                -- Empty subs: same as GET path (plain cross-DB inventory).
-                -- Non-empty subs: route through the substitution-aware pipeline so
-                -- dep DBs re-solve against the substituted root scaling.
-                inventory <-
-                    ExceptT $
-                        if null subs
-                            then fmap (fmap SharedSolver.csInventory) (computeInventoryMatrixWithDepsCached unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver processId)
-                            else
-                                either (Left . T.pack . show) (Right . SharedSolver.csInventory)
-                                    <$> Service.inventoryWithSubsAndDeps
-                                        unitCfg
-                                        (DM.mkDepSolverLookup dbManager)
-                                        db
-                                        dbName
-                                        solver
-                                        processId
-                                        subs
-                let inv = Service.convertToInventoryExport db mFlows mUnits processId activity inventory
-                    flows = ieFlows inv
-                    filtered = case nameFilter of
-                        Nothing -> flows
-                        Just q -> filter (T.isInfixOf (T.toLower q) . T.toLower . bfName . ifdFlow) flows
-                    sorted = L.sortBy (\a b -> compare (abs $ ifdQuantity b) (abs $ ifdQuantity a)) filtered
-                    topN = take limit sorted
-                    slim f =
-                        object
-                            [ "flow" .= bfName (ifdFlow f)
-                            , "quantity" .= ifdQuantity f
-                            , "unit" .= ifdUnitName f
-                            , "category" .= ifdCategory f
-                            , "isEmission" .= ifdIsEmission f
-                            ]
-                pure $
-                    toolSuccessJson rid $
-                        object
-                            [ "statistics" .= toJSON (ieStatistics inv)
-                            , "total_flows" .= length flows
-                            , "shown_flows" .= length topN
-                            , "flows" .= map slim topN
-                            ]
-            )
+    runTool rid $ do
+        (dbName, pid) <- except $ (,) <$> requireText "database" args <*> requireText "process_id" args
+        ld <- requireDatabase dbManager dbName
+        let db = ldDatabase ld
+            solver = ldSharedSolver ld
+            limit = fromMaybe 50 (intArg "limit" args)
+            nameFilter = textArg "flow" args
+        except $ ensureLinked dbName "computing inventory" db
+        (processId, activity) <- liftShow (Service.resolveActivityAndProcessId db pid)
+        subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
+        unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        -- Empty subs: same as GET path (plain cross-DB inventory).
+        -- Non-empty subs: route through the substitution-aware pipeline so
+        -- dep DBs re-solve against the substituted root scaling.
+        inventory <-
+            ExceptT $
+                if null subs
+                    then fmap (fmap SharedSolver.csInventory) (computeInventoryMatrixWithDepsCached unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver processId)
+                    else
+                        either (Left . T.pack . show) (Right . SharedSolver.csInventory)
+                            <$> Service.inventoryWithSubsAndDeps
+                                unitCfg
+                                (DM.mkDepSolverLookup dbManager)
+                                db
+                                dbName
+                                solver
+                                processId
+                                subs
+        let inv = Service.convertToInventoryExport db mFlows mUnits processId activity inventory
+            flows = ieFlows inv
+            filtered = case nameFilter of
+                Nothing -> flows
+                Just q -> filter (T.isInfixOf (T.toLower q) . T.toLower . bfName . ifdFlow) flows
+            sorted = L.sortBy (\a b -> compare (abs $ ifdQuantity b) (abs $ ifdQuantity a)) filtered
+            topN = take limit sorted
+            slim f =
+                object
+                    [ "flow" .= bfName (ifdFlow f)
+                    , "quantity" .= ifdQuantity f
+                    , "unit" .= ifdUnitName f
+                    , "category" .= ifdCategory f
+                    , "isEmission" .= ifdIsEmission f
+                    ]
+        pure $
+            toolSuccessJson rid $
+                object
+                    [ "statistics" .= toJSON (ieStatistics inv)
+                    , "total_flows" .= length flows
+                    , "shown_flows" .= length topN
+                    , "flows" .= map slim topN
+                    ]
 
 -- | JSON shape for one uncharacterized-flow diagnostic entry.
 encodeUncharacterized :: UncharacterizedFlow -> Value
@@ -950,8 +965,8 @@ runImpactsRequest dbManager args req = do
         method = lrMethod req
         dbName = lrDbName req
         ra = lrResolved req
-    ExceptT $ pure $ ensureLinked dbName "computing impacts" db
-    subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
+    except $ ensureLinked dbName "computing impacts" db
+    subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
     unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
     (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
     inventory <-
@@ -1013,75 +1028,72 @@ per the naming audit; internal Haskell types keep the 'LCIA' acronym
 -}
 callGetImpacts :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callGetImpacts dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                req <- loadLcaRequest dbManager args
-                ir <- runImpactsRequest dbManager args req
-                (_, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                let topN = fromMaybe 5 (intArg "top_flows" args)
-                    method = lrMethod req
-                    dbName = lrDbName req
-                    ra = lrResolved req
-                    score = loScore (irOutcome ir)
-                    stats = irMappingStats ir
-                    functionalUnit =
-                        T.pack (showFFloat (Just 2) (irRefProductAmount ir) "")
-                            <> " "
-                            <> irRefProductUnit ir
-                            <> " of "
-                            <> irRefProductName ir
-                    contribs = irContribs ir
-                    topFlows = take topN contribs
-                    webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/impacts/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
-                    hasNeg = any (\(_, _, c) -> c < 0) contribs
-                    unknownUuids = irUnknownUuids ir
-                liftIO $
-                    unless (null unknownUuids) $
-                        reportProgress Warning $
-                            "[MCP get_impacts "
-                                <> T.unpack (methodName method)
-                                <> "] "
-                                <> show (length unknownUuids)
-                                <> " inventory flow UUID(s) absent from merged FlowDB — characterization incomplete. Samples: "
-                                <> show (take 3 unknownUuids)
-                let outcome = irOutcome ir
-                    diagnosticsFields =
-                        [ "uncharacterized_flows" .= map encodeUncharacterized (loUncharacterized outcome)
-                        , "characterized_share"
-                            .= ( if loInventoryAbsSum outcome > 0
-                                    then loCharacterizedSum outcome / loInventoryAbsSum outcome
-                                    else 1 :: Double
-                               )
+    runTool rid $ do
+        req <- loadLcaRequest dbManager args
+        ir <- runImpactsRequest dbManager args req
+        (_, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        let topN = fromMaybe 5 (intArg "top_flows" args)
+            method = lrMethod req
+            dbName = lrDbName req
+            ra = lrResolved req
+            score = loScore (irOutcome ir)
+            stats = irMappingStats ir
+            functionalUnit =
+                T.pack (showFFloat (Just 2) (irRefProductAmount ir) "")
+                    <> " "
+                    <> irRefProductUnit ir
+                    <> " of "
+                    <> irRefProductName ir
+            contribs = irContribs ir
+            topFlows = take topN contribs
+            webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/impacts/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
+            hasNeg = any (\(_, _, c) -> c < 0) contribs
+            unknownUuids = irUnknownUuids ir
+        liftIO $
+            unless (null unknownUuids) $
+                reportProgress Warning $
+                    "[MCP get_impacts "
+                        <> T.unpack (methodName method)
+                        <> "] "
+                        <> show (length unknownUuids)
+                        <> " inventory flow UUID(s) absent from merged FlowDB — characterization incomplete. Samples: "
+                        <> show (take 3 unknownUuids)
+        let outcome = irOutcome ir
+            diagnosticsFields =
+                [ "uncharacterized_flows" .= map encodeUncharacterized (loUncharacterized outcome)
+                , "characterized_share"
+                    .= ( if loInventoryAbsSum outcome > 0
+                            then loCharacterizedSum outcome / loInventoryAbsSum outcome
+                            else 1 :: Double
+                       )
+                ]
+        pure $
+            toolSuccessJson rid $
+                attachMarketHintByName (activityName (raActivity ra)) $
+                    object $
+                        [ "method" .= methodName method
+                        , "category" .= methodCategory method
+                        , "score" .= score
+                        , "unit" .= methodUnit method
+                        , "functional_unit" .= functionalUnit
+                        , "mapped_flows" .= (msTotal stats - msUnmatched stats)
+                        , "has_negative_contributions" .= hasNeg
+                        , "top_flows"
+                            .= [ object
+                                    [ "flow_name" .= bfName f
+                                    , "contribution" .= c
+                                    , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double)
+                                    , "flow_id" .= UUID.toText (bfId f)
+                                    , "category" .= bfCompartmentName f
+                                    , "compartment" .= bfCompartmentSub f
+                                    , "cf_value" .= cfVal
+                                    , "flow_unit" .= getUnitNameForBioFlow mUnits f
+                                    ]
+                               | (f, cfVal, c) <- topFlows
+                               ]
                         ]
-                pure $
-                    toolSuccessJson rid $
-                        attachMarketHintByName (activityName (raActivity ra)) $
-                            object $
-                                [ "method" .= methodName method
-                                , "category" .= methodCategory method
-                                , "score" .= score
-                                , "unit" .= methodUnit method
-                                , "functional_unit" .= functionalUnit
-                                , "mapped_flows" .= (msTotal stats - msUnmatched stats)
-                                , "has_negative_contributions" .= hasNeg
-                                , "top_flows"
-                                    .= [ object
-                                            [ "flow_name" .= bfName f
-                                            , "contribution" .= c
-                                            , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double)
-                                            , "flow_id" .= UUID.toText (bfId f)
-                                            , "category" .= bfCompartmentName f
-                                            , "compartment" .= bfCompartmentSub f
-                                            , "cf_value" .= cfVal
-                                            , "flow_unit" .= getUnitNameForBioFlow mUnits f
-                                            ]
-                                       | (f, cfVal, c) <- topFlows
-                                       ]
-                                ]
-                                    ++ webUrlPair
-                                    ++ (if fromMaybe False (boolArg "include_diagnostics" args) then diagnosticsFields else [])
-            )
+                            ++ webUrlPair
+                            ++ (if fromMaybe False (boolArg "include_diagnostics" args) then diagnosticsFields else [])
 
 {- | Handler for the 'compute_sensitivity' MCP tool. Mirrors the REST
 @POST /sensitivity/{collection}/{methodId}@ endpoint: runs Service.computeSensitivities
@@ -1092,77 +1104,72 @@ location-hierarchy walk; non-regionalized methods stay on the classic
 -}
 callComputeSensitivity :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callComputeSensitivity dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                req <- loadLcaRequest dbManager args
-                let ld = lrLoaded req
-                    db = ldDatabase ld
-                    method = lrMethod req
-                    dbName = lrDbName req
-                    ra = lrResolved req
-                ExceptT $ pure $ ensureLinked dbName "computing sensitivity" db
-                perts <-
-                    ExceptT $
-                        pure
-                            ( parseArrayArg
-                                "perturbations"
-                                (Just "'perturbations' is required (array of {consumer, supplier, delta, label?})")
-                                args ::
-                                Either Text [Perturbation]
-                            )
-                unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
-                hier <- liftIO $ DM.getLocationHierarchy dbManager
-                eRes <-
-                    liftIO $
-                        Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts
-                (baselineX, perResults) <- case eRes of
-                    Left err -> throwE (T.pack (show err))
-                    Right v -> pure v
-                let scoreOf x =
-                        let inv = applyBiosphereMatrix db x
-                         in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of
-                                Right s -> Right s
-                                Left e -> Left e
-                baselineScore <- case scoreOf baselineX of
-                    Right s -> pure s
-                    Left e -> throwE ("baseline scoring failed: " <> e)
-                let webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/sensitivity/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
-                    pertEntry (p, eitherX) =
-                        let base =
-                                [ "perturbation"
-                                    .= object
-                                        [ "consumer" .= perConsumer p
-                                        , "supplier" .= perSupplier p
-                                        , "delta" .= perDelta p
-                                        ]
+    runTool rid $ do
+        req <- loadLcaRequest dbManager args
+        let ld = lrLoaded req
+            db = ldDatabase ld
+            method = lrMethod req
+            dbName = lrDbName req
+            ra = lrResolved req
+        except $ ensureLinked dbName "computing sensitivity" db
+        perts <-
+            ExceptT $
+                pure
+                    ( parseArrayArg
+                        "perturbations"
+                        (Just "'perturbations' is required (array of {consumer, supplier, delta, label?})")
+                        args ::
+                        Either Text [Perturbation]
+                    )
+        unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
+        hier <- liftIO $ DM.getLocationHierarchy dbManager
+        eRes <-
+            liftIO $
+                Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts
+        (baselineX, perResults) <- liftShow eRes
+        let scoreOf x =
+                let inv = applyBiosphereMatrix db x
+                 in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of
+                        Right s -> Right s
+                        Left e -> Left e
+        baselineScore <- case scoreOf baselineX of
+            Right s -> pure s
+            Left e -> throwE ("baseline scoring failed: " <> e)
+        let webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/sensitivity/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
+            pertEntry (p, eitherX) =
+                let base =
+                        [ "perturbation"
+                            .= object
+                                [ "consumer" .= perConsumer p
+                                , "supplier" .= perSupplier p
+                                , "delta" .= perDelta p
                                 ]
-                            withLabel = case perLabel p of
-                                Just l -> ("label" .= l) : base
-                                Nothing -> base
-                         in case eitherX of
-                                Left err -> object (("error" .= err) : withLabel)
-                                Right x' -> case scoreOf x' of
-                                    Left err -> object (("error" .= err) : withLabel)
-                                    Right s ->
-                                        object
-                                            ( ("score" .= s)
-                                                : ("delta_score" .= (s - baselineScore))
-                                                : withLabel
-                                            )
-                pure $
-                    toolSuccessJson rid $
-                        object $
-                            [ "method" .= methodName method
-                            , "category" .= methodCategory method
-                            , "unit" .= methodUnit method
-                            , "baseline_score" .= baselineScore
-                            , "perturbed" .= map pertEntry perResults
-                            ]
-                                ++ webUrlPair
-            )
+                        ]
+                    withLabel = case perLabel p of
+                        Just l -> ("label" .= l) : base
+                        Nothing -> base
+                 in case eitherX of
+                        Left err -> object (("error" .= err) : withLabel)
+                        Right x' -> case scoreOf x' of
+                            Left err -> object (("error" .= err) : withLabel)
+                            Right s ->
+                                object
+                                    ( ("score" .= s)
+                                        : ("delta_score" .= (s - baselineScore))
+                                        : withLabel
+                                    )
+        pure $
+            toolSuccessJson rid $
+                object $
+                    [ "method" .= methodName method
+                    , "category" .= methodCategory method
+                    , "unit" .= methodUnit method
+                    , "baseline_score" .= baselineScore
+                    , "perturbed" .= map pertEntry perResults
+                    ]
+                        ++ webUrlPair
 
 {- | Cross-database impact comparison for mapping audits.
 
@@ -1180,65 +1187,62 @@ audit is designed to expose.
 -}
 callCompareImpacts :: DatabaseManager -> Value -> KeyMap Value -> IO Value
 callCompareImpacts dbManager rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                argsA <- ExceptT . pure $ subArgs "_a" args
-                argsB <- ExceptT . pure $ subArgs "_b" args
-                reqA <- loadLcaRequest dbManager argsA
-                reqB <- loadLcaRequest dbManager argsB
-                irA <- runImpactsRequest dbManager argsA reqA
-                irB <- runImpactsRequest dbManager argsB reqB
-                let topN = fromMaybe 10 (intArg "top_flows" args)
-                    scoreA = loScore (irOutcome irA)
-                    scoreB = loScore (irOutcome irB)
-                    delta = scoreA - scoreB
-                    relPct =
-                        if scoreB /= 0
-                            then abs delta / abs scoreB * 100
-                            else 0
-                    aTop = take topN (irContribs irA)
-                    bTop = take topN (irContribs irB)
-                    aMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irA]
-                    bMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irB]
-                    common =
-                        [ object
-                            [ "flow_name" .= bfName f
-                            , "category" .= bfCompartmentName f
-                            , "compartment" .= bfCompartmentSub f
-                            , "a_contrib" .= cA
-                            , "b_contrib" .= cB
-                            , "delta" .= (cA - cB)
-                            ]
-                        | (f, _, cA) <- aTop
-                        , let k = flowKey f
-                        , Just cB <- [M.lookup k bMap]
-                        ]
-                    aOnly =
-                        [ encodeContrib f c
-                        | (f, _, c) <- aTop
-                        , M.notMember (flowKey f) bMap
-                        ]
-                    bOnly =
-                        [ encodeContrib f c
-                        | (f, _, c) <- bTop
-                        , M.notMember (flowKey f) aMap
-                        ]
-                pure $
-                    toolSuccessJson rid $
-                        object
-                            [ "a" .= sideJson reqA irA
-                            , "b" .= sideJson reqB irB
-                            , "delta"
-                                .= object
-                                    [ "absolute" .= delta
-                                    , "relative_pct" .= relPct
-                                    ]
-                            , "common_flows" .= common
-                            , "top_a_only_flows" .= aOnly
-                            , "top_b_only_flows" .= bOnly
+    runTool rid $ do
+        argsA <- except $ subArgs "_a" args
+        argsB <- except $ subArgs "_b" args
+        reqA <- loadLcaRequest dbManager argsA
+        reqB <- loadLcaRequest dbManager argsB
+        irA <- runImpactsRequest dbManager argsA reqA
+        irB <- runImpactsRequest dbManager argsB reqB
+        let topN = fromMaybe 10 (intArg "top_flows" args)
+            scoreA = loScore (irOutcome irA)
+            scoreB = loScore (irOutcome irB)
+            delta = scoreA - scoreB
+            relPct =
+                if scoreB /= 0
+                    then abs delta / abs scoreB * 100
+                    else 0
+            aTop = take topN (irContribs irA)
+            bTop = take topN (irContribs irB)
+            aMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irA]
+            bMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irB]
+            common =
+                [ object
+                    [ "flow_name" .= bfName f
+                    , "category" .= bfCompartmentName f
+                    , "compartment" .= bfCompartmentSub f
+                    , "a_contrib" .= cA
+                    , "b_contrib" .= cB
+                    , "delta" .= (cA - cB)
+                    ]
+                | (f, _, cA) <- aTop
+                , let k = flowKey f
+                , Just cB <- [M.lookup k bMap]
+                ]
+            aOnly =
+                [ encodeContrib f c
+                | (f, _, c) <- aTop
+                , M.notMember (flowKey f) bMap
+                ]
+            bOnly =
+                [ encodeContrib f c
+                | (f, _, c) <- bTop
+                , M.notMember (flowKey f) aMap
+                ]
+        pure $
+            toolSuccessJson rid $
+                object
+                    [ "a" .= sideJson reqA irA
+                    , "b" .= sideJson reqB irB
+                    , "delta"
+                        .= object
+                            [ "absolute" .= delta
+                            , "relative_pct" .= relPct
                             ]
-            )
+                    , "common_flows" .= common
+                    , "top_a_only_flows" .= aOnly
+                    , "top_b_only_flows" .= bOnly
+                    ]
   where
     sideJson req ir =
         let outcome = irOutcome ir
@@ -1580,20 +1584,14 @@ unknown method, unresolvable process id).
 loadLcaRequest :: DatabaseManager -> KeyMap Value -> ExceptT Text IO LcaRequest
 loadLcaRequest dbManager args = do
     (dbName, pidText, methodIdText) <-
-        ExceptT $
-            pure $
-                (,,)
-                    <$> requireText "database" args
-                    <*> requireText "process_id" args
-                    <*> requireText "method_id" args
-    mLoaded <- liftIO $ getDatabase dbManager dbName
-    ld <- case mLoaded of
-        Nothing -> throwE ("Database not loaded: " <> dbName)
-        Just x -> pure x
+        except $
+            (,,)
+                <$> requireText "database" args
+                <*> requireText "process_id" args
+                <*> requireText "method_id" args
+    ld <- requireDatabase dbManager dbName
     (col, method) <- ExceptT (resolveMethod dbManager methodIdText)
-    (pid, act) <- case Service.resolveActivityAndProcessId (ldDatabase ld) pidText of
-        Left err -> throwE (T.pack (show err))
-        Right v -> pure v
+    (pid, act) <- liftShow (Service.resolveActivityAndProcessId (ldDatabase ld) pidText)
     pure
         LcaRequest
             { lrDbName = dbName
@@ -1624,138 +1622,132 @@ ensureLinked dbName op db =
 
 callGetContributingFlows :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callGetContributingFlows dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                req <- loadLcaRequest dbManager args
-                let ld = lrLoaded req
-                    db = ldDatabase ld
-                    method = lrMethod req
-                    dbName = lrDbName req
-                    ra = lrResolved req
-                    lim = fromMaybe 20 (intArg "limit" args)
-                    webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/contributing-flows/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
-                ExceptT $ pure $ ensureLinked dbName "computing contributions" db
-                unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                sol <-
-                    ExceptT $
-                        computeInventoryMatrixWithDepsCached
-                            unitCfg
-                            (DM.mkDepSolverLookup dbManager)
-                            db
-                            dbName
-                            (ldSharedSolver ld)
-                            (raPid ra)
-                let inventory = SharedSolver.csInventory sol
-                tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
-                let outcome = computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables
-                    score = loScore outcome
-                    (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables
-                    contribs = L.sortOn (\(_, _, c) -> negate (abs c)) rawContribs
-                    top = take lim contribs
-                    hasNeg = any (\(_, _, c) -> c < 0) contribs
-                diagnosticsFields <-
-                    if fromMaybe False (boolArg "include_diagnostics" args)
-                        then do
-                            idx <- liftIO $ DM.mapMethodToIndexCached dbManager dbName method
-                            let opts = defaultUncharacterizedOpts
-                                uncharacterized =
-                                    Mapping.findUncharacterized
-                                        unitCfg
-                                        mUnits
-                                        mFlows
-                                        inventory
-                                        tables
-                                        (DM.dmChemSynonyms dbManager)
-                                        idx
-                                        opts
-                            pure
-                                [ "uncharacterized_flows" .= map encodeUncharacterized uncharacterized
-                                , "characterized_share"
-                                    .= ( if loInventoryAbsSum outcome > 0
-                                            then loCharacterizedSum outcome / loInventoryAbsSum outcome
-                                            else 1 :: Double
-                                       )
+    runTool rid $ do
+        req <- loadLcaRequest dbManager args
+        let ld = lrLoaded req
+            db = ldDatabase ld
+            method = lrMethod req
+            dbName = lrDbName req
+            ra = lrResolved req
+            lim = fromMaybe 20 (intArg "limit" args)
+            webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/contributing-flows/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req)
+        except $ ensureLinked dbName "computing contributions" db
+        unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        sol <-
+            ExceptT $
+                computeInventoryMatrixWithDepsCached
+                    unitCfg
+                    (DM.mkDepSolverLookup dbManager)
+                    db
+                    dbName
+                    (ldSharedSolver ld)
+                    (raPid ra)
+        let inventory = SharedSolver.csInventory sol
+        tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
+        let outcome = computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables
+            score = loScore outcome
+            (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables
+            contribs = L.sortOn (\(_, _, c) -> negate (abs c)) rawContribs
+            top = take lim contribs
+            hasNeg = any (\(_, _, c) -> c < 0) contribs
+        diagnosticsFields <-
+            if fromMaybe False (boolArg "include_diagnostics" args)
+                then do
+                    idx <- liftIO $ DM.mapMethodToIndexCached dbManager dbName method
+                    let opts = defaultUncharacterizedOpts
+                        uncharacterized =
+                            Mapping.findUncharacterized
+                                unitCfg
+                                mUnits
+                                mFlows
+                                inventory
+                                tables
+                                (DM.dmChemSynonyms dbManager)
+                                idx
+                                opts
+                    pure
+                        [ "uncharacterized_flows" .= map encodeUncharacterized uncharacterized
+                        , "characterized_share"
+                            .= ( if loInventoryAbsSum outcome > 0
+                                    then loCharacterizedSum outcome / loInventoryAbsSum outcome
+                                    else 1 :: Double
+                               )
+                        ]
+                else pure []
+        liftIO $
+            unless (null unknownUuids) $
+                reportProgress Warning $
+                    "[MCP get_contributing_flows "
+                        <> T.unpack (methodName method)
+                        <> "] "
+                        <> show (length unknownUuids)
+                        <> " inventory flow UUID(s) absent from merged FlowDB. Samples: "
+                        <> show (take 3 unknownUuids)
+        pure $
+            toolSuccessJson rid $
+                object $
+                    [ "method" .= methodName method
+                    , "unit" .= methodUnit method
+                    , "total_score" .= score
+                    , "has_negative_contributions" .= hasNeg
+                    , "top_flows"
+                        .= [ object
+                                [ "flow_name" .= bfName f
+                                , "contribution" .= c
+                                , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double)
+                                , "flow_id" .= UUID.toText (bfId f)
+                                , "category" .= bfCompartmentName f
+                                , "compartment" .= bfCompartmentSub f
+                                , "cf_value" .= cfVal
                                 ]
-                        else pure []
-                liftIO $
-                    unless (null unknownUuids) $
-                        reportProgress Warning $
-                            "[MCP get_contributing_flows "
-                                <> T.unpack (methodName method)
-                                <> "] "
-                                <> show (length unknownUuids)
-                                <> " inventory flow UUID(s) absent from merged FlowDB. Samples: "
-                                <> show (take 3 unknownUuids)
-                pure $
-                    toolSuccessJson rid $
-                        object $
-                            [ "method" .= methodName method
-                            , "unit" .= methodUnit method
-                            , "total_score" .= score
-                            , "has_negative_contributions" .= hasNeg
-                            , "top_flows"
-                                .= [ object
-                                        [ "flow_name" .= bfName f
-                                        , "contribution" .= c
-                                        , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double)
-                                        , "flow_id" .= UUID.toText (bfId f)
-                                        , "category" .= bfCompartmentName f
-                                        , "compartment" .= bfCompartmentSub f
-                                        , "cf_value" .= cfVal
-                                        ]
-                                   | (f, cfVal, c) <- top
-                                   ]
-                            ]
-                                ++ webUrlPair
-                                ++ diagnosticsFields
-            )
+                           | (f, cfVal, c) <- top
+                           ]
+                    ]
+                        ++ webUrlPair
+                        ++ diagnosticsFields
 
 callGetContributingActivities :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callGetContributingActivities dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                req <- loadLcaRequest dbManager args
-                let ld = lrLoaded req
-                    db = ldDatabase ld
-                    method = lrMethod req
-                    dbName = lrDbName req
-                    ra = lrResolved req
-                    lim = fromMaybe 10 (intArg "limit" args)
-                ExceptT $ pure $ ensureLinked dbName "computing contributions" db
-                unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
-                (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
-                tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
-                -- Skip separate inventory compute: contributions sum equals the score.
-                contributions <-
-                    ExceptT $
-                        crossDBProcessContributions
-                            unitCfg
-                            mUnits
-                            mFlows
-                            (DM.mkDepSolverLookup dbManager)
-                            db
-                            dbName
-                            (ldSharedSolver ld)
-                            (raPid ra)
-                            tables
-                let score = sum (M.elems contributions)
-                    sorted = L.sortOn (\(_, c) -> negate (abs c)) (M.toList contributions)
-                    top = take lim sorted
-                    hasNeg = any (\(_, c) -> c < 0) top
-                rows <- liftIO $ mapM (mkMcpCrossDBEntry dbManager dbName mBaseUrl (lrCollection req) (lrMethodIdText req) mFlows mUnits score) top
-                pure $
-                    toolSuccessJson rid $
-                        object
-                            [ "method" .= methodName method
-                            , "unit" .= methodUnit method
-                            , "total_score" .= score
-                            , "has_negative_contributions" .= hasNeg
-                            , "processes" .= rows
-                            ]
-            )
+    runTool rid $ do
+        req <- loadLcaRequest dbManager args
+        let ld = lrLoaded req
+            db = ldDatabase ld
+            method = lrMethod req
+            dbName = lrDbName req
+            ra = lrResolved req
+            lim = fromMaybe 10 (intArg "limit" args)
+        except $ ensureLinked dbName "computing contributions" db
+        unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+        (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager
+        tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method
+        -- Skip separate inventory compute: contributions sum equals the score.
+        contributions <-
+            ExceptT $
+                crossDBProcessContributions
+                    unitCfg
+                    mUnits
+                    mFlows
+                    (DM.mkDepSolverLookup dbManager)
+                    db
+                    dbName
+                    (ldSharedSolver ld)
+                    (raPid ra)
+                    tables
+        let score = sum (M.elems contributions)
+            sorted = L.sortOn (\(_, c) -> negate (abs c)) (M.toList contributions)
+            top = take lim sorted
+            hasNeg = any (\(_, c) -> c < 0) top
+        rows <- liftIO $ mapM (mkMcpCrossDBEntry dbManager dbName mBaseUrl (lrCollection req) (lrMethodIdText req) mFlows mUnits score) top
+        pure $
+            toolSuccessJson rid $
+                object
+                    [ "method" .= methodName method
+                    , "unit" .= methodUnit method
+                    , "total_score" .= score
+                    , "has_negative_contributions" .= hasNeg
+                    , "processes" .= rows
+                    ]
 
 callListGeographies :: DatabaseManager -> Value -> KeyMap Value -> IO Value
 callListGeographies dbManager rid args =
@@ -1833,29 +1825,26 @@ ground at a fraction of the bytes. Replaces the @N@ round-trips of
 -}
 callScoreActivity :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callScoreActivity dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                dbName <- ExceptT $ pure (requireText "database" args)
-                pidText <- ExceptT $ pure (requireText "process_id" args)
-                coll <- ExceptT $ pure (requireText "collection" args)
-                subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
-                wantedSets <- ExceptT $ pure (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text])
-                let mSub = if null subs then Nothing else Just SubstitutionRequest{srSubstitutions = subs}
-                res <- liftIO $ BI.runActivityLCIABatch dbManager dbName pidText coll mSub
-                case res of
-                    Left e -> ExceptT $ pure (Left (batchErrorMsg e))
-                    Right lbr -> do
-                        configured <- liftIO $ configuredScoringSetNames dbManager coll
-                        mActName <- liftIO $ lookupActivityName dbManager dbName pidText
-                        let mTopUrl = scoreActivityWebUrl mBaseUrl dbName pidText coll
-                            enriched =
-                                maybe id attachMarketHintByName mActName $
-                                    addWebUrlMaybe
-                                        mTopUrl
-                                        (slimLCIAPanel (toJSON lbr))
-                        ExceptT $ pure (toolSuccessJson rid <$> filterScoringSets configured wantedSets enriched)
-            )
+    runTool rid $ do
+        dbName <- except (requireText "database" args)
+        pidText <- except (requireText "process_id" args)
+        coll <- except (requireText "collection" args)
+        subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
+        wantedSets <- except (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text])
+        let mSub = if null subs then Nothing else Just SubstitutionRequest{srSubstitutions = subs}
+        res <- liftIO $ BI.runActivityLCIABatch dbManager dbName pidText coll mSub
+        case res of
+            Left e -> throwE (batchErrorMsg e)
+            Right lbr -> do
+                configured <- liftIO $ configuredScoringSetNames dbManager coll
+                mActName <- liftIO $ lookupActivityName dbManager dbName pidText
+                let mTopUrl = scoreActivityWebUrl mBaseUrl dbName pidText coll
+                    enriched =
+                        maybe id attachMarketHintByName mActName $
+                            addWebUrlMaybe
+                                mTopUrl
+                                (slimLCIAPanel (toJSON lbr))
+                except (toolSuccessJson rid <$> filterScoringSets configured wantedSets enriched)
 
 {- | Resolve the activity name for a (db, processId) pair. 'Nothing' when
 the database is not loaded or the PID does not resolve — callers fold
@@ -1894,21 +1883,18 @@ unambiguous; see 'resolveSingleScoringSet' for the rules.
 -}
 callScoreActivities :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value
 callScoreActivities dbManager mBaseUrl rid args =
-    either (toolError rid) id
-        <$> runExceptT
-            ( do
-                dbName <- ExceptT $ pure (requireText "database" args)
-                coll <- ExceptT $ pure (requireText "collection" args)
-                pids <- ExceptT $ pure (parseArrayArg "process_ids" (Just "'process_ids' required (array of strings)") args :: Either Text [Text])
-                wantedSets <- ExceptT $ pure (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text])
-                let summaryOnly = fromMaybe False (boolArg "summary_only" args)
-                configured <- liftIO $ configuredScoringSets dbManager coll
-                chosen <- ExceptT $ pure (resolveSingleScoringSet wantedSets configured)
-                res <- liftIO $ BI.runBatchImpacts dbManager dbName coll Nothing pids
-                case res of
-                    Left e -> ExceptT $ pure (Left (batchErrorMsg e))
-                    Right bir -> pure (toolSuccessJson rid (toColumnarBatch summaryOnly mBaseUrl dbName coll chosen bir))
-            )
+    runTool rid $ do
+        dbName <- except (requireText "database" args)
+        coll <- except (requireText "collection" args)
+        pids <- except (parseArrayArg "process_ids" (Just "'process_ids' required (array of strings)") args :: Either Text [Text])
+        wantedSets <- except (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text])
+        let summaryOnly = fromMaybe False (boolArg "summary_only" args)
+        configured <- liftIO $ configuredScoringSets dbManager coll
+        chosen <- except (resolveSingleScoringSet wantedSets configured)
+        res <- liftIO $ BI.runBatchImpacts dbManager dbName coll Nothing pids
+        case res of
+            Left e -> throwE (batchErrorMsg e)
+            Right bir -> pure (toolSuccessJson rid (toColumnarBatch summaryOnly mBaseUrl dbName coll chosen bir))
 
 {- | Handler for the 'list_scoring_sets' MCP tool.
 

From 04f98468536d1a29136fe9894afccf078be4a6c6 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 15:52:54 +0200
Subject: [PATCH 36/43] refactor(mcp): flatten diagonal tool handlers onto the
 ExceptT idiom

The case-cascade handlers nested up to eight levels deep re-deriving DB
lookups, method resolution, and Either unwrapping by hand. Rewrite them
as flat `runTool rid $ do` blocks over requireDatabase / except /
liftShow: getSupplyChain, getFlowMapping, getCharacterization,
getActivity, listGeographies, plus the withDb helper and the
getPathTo / getConsumers handlers.

getFlowMapping now reuses resolveMethod instead of re-implementing the
UUID-parse-and-filter lookup (identical error messages).

Behaviour-preserving: same toolError text and the same short-circuit
order. buildUnmatchedDbFlows keeps its empty-list fallback (an empty
ranking is not an error), so it stays a plain IO helper.
---
 src/API/MCP.hs | 480 +++++++++++++++++++++----------------------------
 1 file changed, 206 insertions(+), 274 deletions(-)

diff --git a/src/API/MCP.hs b/src/API/MCP.hs
index 443c9fd5..e1d008dd 100644
--- a/src/API/MCP.hs
+++ b/src/API/MCP.hs
@@ -368,14 +368,10 @@ withDb ::
     KeyMap Value ->
     ((Database, SharedSolver) -> IO Value) ->
     IO Value
-withDb dbManager rid args action =
-    case textArg "database" args of
-        Nothing -> return $ toolError rid "Missing required parameter: database"
-        Just dbName -> do
-            mLoaded <- getDatabase dbManager dbName
-            case mLoaded of
-                Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
-                Just ld -> action (ldDatabase ld, ldSharedSolver ld)
+withDb dbManager rid args action = runTool rid $ do
+    dbName <- except (requireText "database" args)
+    ld <- requireDatabase dbManager dbName
+    liftIO $ action (ldDatabase ld, ldSharedSolver ld)
 
 -- ---------------------------------------------------------------------------
 -- ExceptT plumbing shared by every handler
@@ -595,36 +591,30 @@ callSearchFlows rid args (db, _) =
     a <|> _ = a
 
 callGetActivity :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
-callGetActivity rid args (db, _) =
-    case textArg "process_id" args of
-        Nothing -> return $ toolError rid "Missing required parameter: process_id"
-        Just pid -> case validatedExchangeType of
-            Left err -> return $ toolError rid err
-            Right _ ->
-                case Service.getActivityInfo defaultUnitConfig db pid of
-                    Left err -> return $ toolError rid (T.pack $ show err)
-                    Right val -> case fromJSON val of
-                        -- 'val' was built from an 'ActivityInfo' upstream, so a
-                        -- decode failure is genuinely defensive — pass it
-                        -- through unchanged, hint-less.
-                        Error _ -> return $ toolSuccessJson rid val
-                        Success ai ->
-                            -- Single resolve: take the activity name from the
-                            -- 'ActivityInfo' already in hand instead of asking
-                            -- the engine to resolve the PID again.
-                            let attach = attachMarketHintByName (pfaName (piActivity ai))
-                                payload
-                                    | noFilters = val
-                                    | otherwise =
-                                        toJSON
-                                            ai
-                                                { piActivity =
-                                                    (piActivity ai)
-                                                        { pfaExchanges =
-                                                            filter matchExchange (pfaExchanges (piActivity ai))
-                                                        }
-                                                }
-                             in return $ toolSuccessJson rid (attach payload)
+callGetActivity rid args (db, _) = runTool rid $ do
+    pid <- except (requireText "process_id" args)
+    _ <- except validatedExchangeType
+    val <- liftShow (Service.getActivityInfo defaultUnitConfig db pid)
+    pure $ case fromJSON val of
+        -- 'val' was built from an 'ActivityInfo' upstream, so a decode
+        -- failure is genuinely defensive — pass it through unchanged, hint-less.
+        Error _ -> toolSuccessJson rid val
+        Success ai ->
+            -- Single resolve: take the activity name from the 'ActivityInfo'
+            -- already in hand instead of asking the engine to resolve the PID again.
+            let attach = attachMarketHintByName (pfaName (piActivity ai))
+                payload
+                    | noFilters = val
+                    | otherwise =
+                        toJSON
+                            ai
+                                { piActivity =
+                                    (piActivity ai)
+                                        { pfaExchanges =
+                                            filter matchExchange (pfaExchanges (piActivity ai))
+                                        }
+                                }
+             in toolSuccessJson rid (attach payload)
   where
     exchangeType = textArg "exchange_type" args
     flowFilter = textArg "flow" args
@@ -657,73 +647,47 @@ callGetActivity rid args (db, _) =
         Just want -> exchangeIsInput (ewuExchange ewu) == want
 
 callGetSupplyChain :: DatabaseManager -> Value -> KeyMap Value -> IO Value
-callGetSupplyChain dbManager rid args =
-    case (,) <$> requireText "database" args <*> requireText "process_id" args of
-        Left err -> return $ toolError rid err
-        Right (dbName, pid) -> do
-            mLoaded <- getDatabase dbManager dbName
-            case mLoaded of
-                Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
-                Just ld -> do
-                    let db = ldDatabase ld
-                        solver = ldSharedSolver ld
-                        isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
-                        classFilters = case (textArg "classification" args, textArg "classification_value" args) of
-                            (Just sys, Just val) -> [(sys, val, isExact)]
-                            _ -> []
-                        scf =
-                            Service.SupplyChainFilter
-                                { Service.scfCore =
-                                    Service.ActivityFilterCore
-                                        { Service.afcName = textArg "name" args
-                                        , Service.afcLocation = textArg "location" args
-                                        , Service.afcProduct = Nothing
-                                        , Service.afcClassifications = classFilters
-                                        , Service.afcLimit = intArg "limit" args
-                                        , Service.afcOffset = Nothing
-                                        , Service.afcSort = Nothing
-                                        , Service.afcOrder = Nothing
-                                        }
-                                , Service.scfMaxDepth = intArg "max_depth" args
-                                , Service.scfMinQuantity = doubleArg "min_quantity" args
-                                }
-                    case parseArrayArg "substitutions" Nothing args :: Either Text [Substitution] of
-                        Left err -> return $ toolError rid err
-                        Right [] -> do
-                            unitCfg <- DM.getMergedUnitConfig dbManager
-                            result <- Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver pid scf False
-                            case result of
-                                Left err -> return $ toolError rid (T.pack $ show err)
-                                Right val -> return $ toolSuccessJson rid (toJSON val)
-                        Right subs -> case Service.resolveActivityAndProcessId db pid of
-                            Left err -> return $ toolError rid (T.pack $ show err)
-                            Right (processId, _) -> do
-                                eScaling <-
-                                    Service.computeScalingVectorWithSubstitutionsCrossDB
-                                        (DM.mkDepSolverLookup dbManager)
-                                        db
-                                        dbName
-                                        solver
-                                        processId
-                                        subs
-                                case eScaling of
-                                    Left err -> return $ toolError rid (T.pack (show err))
-                                    Right (scalingVec, virtualLinks) -> do
-                                        unitCfg <- DM.getMergedUnitConfig dbManager
-                                        eResp <-
-                                            Service.buildSupplyChainFromScalingVectorCrossDB
-                                                unitCfg
-                                                (DM.mkDepSolverLookup dbManager)
-                                                db
-                                                dbName
-                                                processId
-                                                scalingVec
-                                                virtualLinks
-                                                scf
-                                                False
-                                        case eResp of
-                                            Left e -> return $ toolError rid (T.pack (show e))
-                                            Right v -> return $ toolSuccessJson rid (toJSON v)
+callGetSupplyChain dbManager rid args = runTool rid $ do
+    (dbName, pid) <- except $ (,) <$> requireText "database" args <*> requireText "process_id" args
+    ld <- requireDatabase dbManager dbName
+    let db = ldDatabase ld
+        solver = ldSharedSolver ld
+        depLookup = DM.mkDepSolverLookup dbManager
+        isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
+        classFilters = case (textArg "classification" args, textArg "classification_value" args) of
+            (Just sys, Just val) -> [(sys, val, isExact)]
+            _ -> []
+        scf =
+            Service.SupplyChainFilter
+                { Service.scfCore =
+                    Service.ActivityFilterCore
+                        { Service.afcName = textArg "name" args
+                        , Service.afcLocation = textArg "location" args
+                        , Service.afcProduct = Nothing
+                        , Service.afcClassifications = classFilters
+                        , Service.afcLimit = intArg "limit" args
+                        , Service.afcOffset = Nothing
+                        , Service.afcSort = Nothing
+                        , Service.afcOrder = Nothing
+                        }
+                , Service.scfMaxDepth = intArg "max_depth" args
+                , Service.scfMinQuantity = doubleArg "min_quantity" args
+                }
+    subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution])
+    unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager
+    payload <-
+        if null subs
+            then -- Plain cross-DB supply chain.
+                toJSON <$> (liftIO (Service.getSupplyChain unitCfg depLookup db dbName solver pid scf False) >>= liftShow)
+            else do
+                -- Substitution-aware: re-solve the root scaling, then build from it.
+                (processId, _) <- liftShow (Service.resolveActivityAndProcessId db pid)
+                (scalingVec, virtualLinks) <-
+                    liftIO (Service.computeScalingVectorWithSubstitutionsCrossDB depLookup db dbName solver processId subs) >>= liftShow
+                resp <-
+                    liftIO (Service.buildSupplyChainFromScalingVectorCrossDB unitCfg depLookup db dbName processId scalingVec virtualLinks scf False) >>= liftShow
+                pure (toJSON resp)
+    pure $ toolSuccessJson rid payload
 
 {- | Generic SQL-group-by aggregation. One small primitive for "how much X is
 in Y" questions — replaces ad-hoc decomposition tools.
@@ -796,51 +760,44 @@ callAggregate dbManager rid args (db, solver) =
                      in Just (T.strip sys, T.strip val, isExact)
 
 callGetPathTo :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
-callGetPathTo rid args (db, solver) =
-    case (textArg "process_id" args, textArg "target" args) of
-        (Nothing, _) -> return $ toolError rid "Missing required parameter: process_id"
-        (_, Nothing) -> return $ toolError rid "Missing required parameter: target"
-        (Just pid, Just target) -> do
-            result <- Service.getPathTo db solver pid target
-            case result of
-                Left err -> return $ toolError rid (T.pack $ show err)
-                Right val -> return $ toolSuccessJson rid val
+callGetPathTo rid args (db, solver) = runTool rid $ do
+    pid <- except (requireText "process_id" args)
+    target <- except (requireText "target" args)
+    val <- liftIO (Service.getPathTo db solver pid target) >>= liftShow
+    pure (toolSuccessJson rid val)
 
 callGetConsumers :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
-callGetConsumers presets rid args (db, _) =
-    case textArg "process_id" args of
-        Nothing -> return $ toolError rid "Missing required parameter: process_id"
-        Just pid ->
-            let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
-                dbName = fromMaybe "" (textArg "database" args) -- validated by withDb
-                presetFilters = case textArg "preset" args of
-                    Just pn -> case L.find (\p -> cpName p == pn) presets of
-                        Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p]
-                        Nothing -> []
-                    Nothing -> []
-                explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of
-                    (Just sys, Just val) -> [(sys, val, isExact)]
-                    _ -> []
-                classFilters = presetFilters ++ explicitFilters
-                cnf =
-                    Service.ConsumerFilter
-                        { Service.cnfCore =
-                            Service.ActivityFilterCore
-                                { Service.afcName = textArg "name" args
-                                , Service.afcLocation = textArg "location" args
-                                , Service.afcProduct = textArg "product" args
-                                , Service.afcClassifications = classFilters
-                                , Service.afcLimit = intArg "limit" args
-                                , Service.afcOffset = Nothing
-                                , Service.afcSort = Nothing
-                                , Service.afcOrder = Nothing
-                                }
-                        , Service.cnfMaxDepth = intArg "max_depth" args
-                        , Service.cnfIncludeEdges = fromMaybe False (boolArg "include_edges" args)
+callGetConsumers presets rid args (db, _) = runTool rid $ do
+    pid <- except (requireText "process_id" args)
+    let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
+        dbName = fromMaybe "" (textArg "database" args) -- validated by withDb
+        presetFilters = case textArg "preset" args of
+            Just pn -> case L.find (\p -> cpName p == pn) presets of
+                Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p]
+                Nothing -> []
+            Nothing -> []
+        explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of
+            (Just sys, Just val) -> [(sys, val, isExact)]
+            _ -> []
+        classFilters = presetFilters ++ explicitFilters
+        cnf =
+            Service.ConsumerFilter
+                { Service.cnfCore =
+                    Service.ActivityFilterCore
+                        { Service.afcName = textArg "name" args
+                        , Service.afcLocation = textArg "location" args
+                        , Service.afcProduct = textArg "product" args
+                        , Service.afcClassifications = classFilters
+                        , Service.afcLimit = intArg "limit" args
+                        , Service.afcOffset = Nothing
+                        , Service.afcSort = Nothing
+                        , Service.afcOrder = Nothing
                         }
-             in case Service.getConsumers db dbName pid cnf of
-                    Left err -> return $ toolError rid (T.pack $ show err)
-                    Right results -> return $ toolSuccessJson rid (toJSON results)
+                , Service.cnfMaxDepth = intArg "max_depth" args
+                , Service.cnfIncludeEdges = fromMaybe False (boolArg "include_edges" args)
+                }
+    results <- liftShow (Service.getConsumers db dbName pid cnf)
+    pure (toolSuccessJson rid (toJSON results))
 
 {- | MCP get_inventory: route through the cross-DB back-substitution path
 so inventories from dep DBs are merged into the returned flows.
@@ -1313,64 +1270,52 @@ callListMethods dbManager rid = do
     return $ toolSuccessJson rid $ object ["methods" .= summaries]
 
 callGetFlowMapping :: DatabaseManager -> Value -> KeyMap Value -> IO Value
-callGetFlowMapping dbManager rid args =
-    case (,) <$> requireText "database" args <*> requireText "method_id" args of
-        Left err -> return $ toolError rid err
-        Right (dbName, methodIdText) -> do
-            mLoaded <- getDatabase dbManager dbName
-            case mLoaded of
-                Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
-                Just ld -> do
-                    let db = ldDatabase ld
-                    loadedMethods <- DM.getLoadedMethods dbManager
-                    let allMethods = map snd loadedMethods
-                    case UUID.fromText methodIdText of
-                        Nothing -> return $ toolError rid "Invalid method UUID format"
-                        Just uuid ->
-                            case filter (\m -> methodId m == uuid) allMethods of
-                                [] -> return $ toolError rid "Method not found"
-                                (method : _) -> do
-                                    mappings <- DM.mapMethodToFlowsCached dbManager dbName db method
-                                    let stats = computeMappingStats mappings
-                                        total = msTotal stats
-                                        matched = total - msUnmatched stats
-                                        coverage =
-                                            if total > 0
-                                                then fromIntegral matched / fromIntegral total * 100 :: Double
-                                                else 0
-                                        verbose = fromMaybe False (boolArg "verbose" args)
-                                        maxUnm = fromMaybe 50 (intArg "max_unmatched" args)
-                                    extra <-
-                                        if not verbose
-                                            then pure []
-                                            else do
-                                                let unmatchedCFs =
-                                                        take
-                                                            maxUnm
-                                                            [ object
-                                                                [ "name" .= mcfFlowName cf
-                                                                , "cas" .= mcfCAS cf
-                                                                , "compartment" .= mcfCompartment cf
-                                                                , "cf_value" .= mcfValue cf
-                                                                , "cf_unit" .= mcfUnit cf
-                                                                ]
-                                                            | (cf, Nothing) <- mappings
-                                                            ]
-                                                unmatchedFlows <- buildUnmatchedDbFlows dbManager dbName db method args maxUnm
-                                                pure
-                                                    [ "unmatched_cfs" .= unmatchedCFs
-                                                    , "unmatched_db_flows" .= unmatchedFlows
-                                                    ]
-                                    return $
-                                        toolSuccessJson rid $
-                                            object $
-                                                [ "method" .= methodName method
-                                                , "total" .= total
-                                                , "matched" .= matched
-                                                , "unmatched" .= msUnmatched stats
-                                                , "coverage" .= coverage
-                                                ]
-                                                    ++ extra
+callGetFlowMapping dbManager rid args = runTool rid $ do
+    (dbName, methodIdText) <- except $ (,) <$> requireText "database" args <*> requireText "method_id" args
+    ld <- requireDatabase dbManager dbName
+    (_, method) <- ExceptT (resolveMethod dbManager methodIdText)
+    let db = ldDatabase ld
+    mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
+    let stats = computeMappingStats mappings
+        total = msTotal stats
+        matched = total - msUnmatched stats
+        coverage =
+            if total > 0
+                then fromIntegral matched / fromIntegral total * 100 :: Double
+                else 0
+        verbose = fromMaybe False (boolArg "verbose" args)
+        maxUnm = fromMaybe 50 (intArg "max_unmatched" args)
+    extra <-
+        if not verbose
+            then pure []
+            else do
+                let unmatchedCFs =
+                        take
+                            maxUnm
+                            [ object
+                                [ "name" .= mcfFlowName cf
+                                , "cas" .= mcfCAS cf
+                                , "compartment" .= mcfCompartment cf
+                                , "cf_value" .= mcfValue cf
+                                , "cf_unit" .= mcfUnit cf
+                                ]
+                            | (cf, Nothing) <- mappings
+                            ]
+                unmatchedFlows <- liftIO $ buildUnmatchedDbFlows dbManager dbName db method args maxUnm
+                pure
+                    [ "unmatched_cfs" .= unmatchedCFs
+                    , "unmatched_db_flows" .= unmatchedFlows
+                    ]
+    pure $
+        toolSuccessJson rid $
+            object $
+                [ "method" .= methodName method
+                , "total" .= total
+                , "matched" .= matched
+                , "unmatched" .= msUnmatched stats
+                , "coverage" .= coverage
+                ]
+                    ++ extra
 
 {- | Verbose-mode helper: rank unmatched DB flows for a method.
 
@@ -1434,52 +1379,44 @@ buildUnmatchedDbFlows dbManager dbName db method args maxN =
                                 pure (map encodeUncharacterized uncharacterized)
 
 callGetCharacterization :: DatabaseManager -> Value -> KeyMap Value -> IO Value
-callGetCharacterization dbManager rid args =
-    case (,) <$> requireText "database" args <*> requireText "method_id" args of
-        Left err -> return $ toolError rid err
-        Right (dbName, methodIdText) -> do
-            mLoaded <- getDatabase dbManager dbName
-            case mLoaded of
-                Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
-                Just ld -> do
-                    eMethod <- resolveMethod dbManager methodIdText
-                    case eMethod of
-                        Left err -> return $ toolError rid err
-                        Right (_, method) -> do
-                            let db = ldDatabase ld
-                                lim = fromMaybe 20 (intArg "limit" args)
-                                flowQ = textArg "flow" args
-                                queryLower = fmap T.toLower flowQ
-                            mappings <- DM.mapMethodToFlowsCached dbManager dbName db method
-                            let matched =
-                                    [ (cf, f, strat)
-                                    | (cf, Just (f, strat)) <- mappings
-                                    , matchQuery queryLower (mcfFlowName cf) (bfName f)
-                                    ]
-                                sorted = L.sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched
-                                top = take lim sorted
-                                mkEntry (cf, f, strat) =
-                                    object
-                                        [ "cf_flow_name" .= mcfFlowName cf
-                                        , "cf_value" .= mcfValue cf
-                                        , "cf_unit" .= mcfUnit cf
-                                        , "direction" .= (case mcfDirection cf of Input -> "Input" :: Text; Output -> "Output")
-                                        , "db_flow_name" .= bfName f
-                                        , "flow_id" .= UUID.toText (bfId f)
-                                        , "flow_unit" .= getUnitNameForBioFlow (dbUnits db) f
-                                        , "category" .= bfCompartmentName f
-                                        , "compartment" .= bfCompartmentSub f
-                                        , "match_strategy" .= show strat
-                                        ]
-                            return $
-                                toolSuccessJson rid $
-                                    object
-                                        [ "method" .= methodName method
-                                        , "unit" .= methodUnit method
-                                        , "matches" .= length matched
-                                        , "shown" .= length top
-                                        , "factors" .= map mkEntry top
-                                        ]
+callGetCharacterization dbManager rid args = runTool rid $ do
+    (dbName, methodIdText) <- except $ (,) <$> requireText "database" args <*> requireText "method_id" args
+    ld <- requireDatabase dbManager dbName
+    (_, method) <- ExceptT (resolveMethod dbManager methodIdText)
+    let db = ldDatabase ld
+        lim = fromMaybe 20 (intArg "limit" args)
+        flowQ = textArg "flow" args
+        queryLower = fmap T.toLower flowQ
+    mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method
+    let matched =
+            [ (cf, f, strat)
+            | (cf, Just (f, strat)) <- mappings
+            , matchQuery queryLower (mcfFlowName cf) (bfName f)
+            ]
+        sorted = L.sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched
+        top = take lim sorted
+        mkEntry (cf, f, strat) =
+            object
+                [ "cf_flow_name" .= mcfFlowName cf
+                , "cf_value" .= mcfValue cf
+                , "cf_unit" .= mcfUnit cf
+                , "direction" .= (case mcfDirection cf of Input -> "Input" :: Text; Output -> "Output")
+                , "db_flow_name" .= bfName f
+                , "flow_id" .= UUID.toText (bfId f)
+                , "flow_unit" .= getUnitNameForBioFlow (dbUnits db) f
+                , "category" .= bfCompartmentName f
+                , "compartment" .= bfCompartmentSub f
+                , "match_strategy" .= show strat
+                ]
+    pure $
+        toolSuccessJson rid $
+            object
+                [ "method" .= methodName method
+                , "unit" .= methodUnit method
+                , "matches" .= length matched
+                , "shown" .= length top
+                , "factors" .= map mkEntry top
+                ]
   where
     matchQuery Nothing _ _ = True
     matchQuery (Just q) cfName dbFlowName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbFlowName)
@@ -1750,29 +1687,24 @@ callGetContributingActivities dbManager mBaseUrl rid args =
                     ]
 
 callListGeographies :: DatabaseManager -> Value -> KeyMap Value -> IO Value
-callListGeographies dbManager rid args =
-    case textArg "database" args of
-        Nothing -> return $ toolError rid "Missing required parameter: database"
-        Just dbName -> do
-            mLoaded <- getDatabase dbManager dbName
-            case mLoaded of
-                Nothing -> return $ toolError rid ("Database not loaded: " <> dbName)
-                Just ld -> do
-                    let db = ldDatabase ld
-                        geoMap = dmGeographies dbManager
-                        codes = L.sort $ M.keys (idxByLocation (dbIndexes db))
-                        mkEntry code =
-                            let (displayName, parents) = M.findWithDefault (code, []) code geoMap
-                                parentStr = T.intercalate "|" parents
-                             in object
-                                    [ "geo" .= code
-                                    , "display_name" .= displayName
-                                    , "parent_regions" .= parentStr
-                                    ]
-                    return $
-                        toolSuccessJson rid $
-                            object
-                                ["geographies" .= map mkEntry codes]
+callListGeographies dbManager rid args = runTool rid $ do
+    dbName <- except (requireText "database" args)
+    ld <- requireDatabase dbManager dbName
+    let db = ldDatabase ld
+        geoMap = dmGeographies dbManager
+        codes = L.sort $ M.keys (idxByLocation (dbIndexes db))
+        mkEntry code =
+            let (displayName, parents) = M.findWithDefault (code, []) code geoMap
+                parentStr = T.intercalate "|" parents
+             in object
+                    [ "geo" .= code
+                    , "display_name" .= displayName
+                    , "parent_regions" .= parentStr
+                    ]
+    pure $
+        toolSuccessJson rid $
+            object
+                ["geographies" .= map mkEntry codes]
 
 -- ============================================================================
 -- score_activity / score_activities / list_scoring_sets

From 5797e91e31acca2b32be342c5809020267ec9473 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 15:56:06 +0200
Subject: [PATCH 37/43] refactor(mcp): remove duplicated and redundant pure
 helper code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Factor the classification-filter construction (preset lookup + explicit
  classification/value args) into explicitClassFilter and
  classificationFilters, shared by the search, consumers, and
  supply-chain handlers instead of three near-identical inline copies.
- Drop the two local `<|>` rebindings in the search handlers; they
  re-implemented the Alternative Maybe instance now imported directly.
- Collapse a `case … of Right s -> Right s; Left e -> Left e` (an
  open-coded identity on Either) in compute_sensitivity's scoreOf.

Behaviour-preserving: supply-chain keeps explicit-only filters (it has
no preset arg), and `<|>` on Maybe matches the deleted definitions.
---
 src/API/MCP.hs | 69 +++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 40 deletions(-)

diff --git a/src/API/MCP.hs b/src/API/MCP.hs
index e1d008dd..7b07086b 100644
--- a/src/API/MCP.hs
+++ b/src/API/MCP.hs
@@ -507,6 +507,29 @@ callListPresets presets rid =
                 | p <- presets
                 ]
 
+{- | Explicit classification filter from the @classification@ +
+@classification_value@ args (honouring @classification_match@). Shared by the
+search, consumers, and supply-chain handlers.
+-}
+explicitClassFilter :: KeyMap Value -> [(Text, Text, Bool)]
+explicitClassFilter args = case (textArg "classification" args, textArg "classification_value" args) of
+    (Just sys, Just val) -> [(sys, val, isExact)]
+    _ -> []
+  where
+    isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
+
+{- | Preset filters (looked up by @preset@ name) followed by the explicit
+filter. Shared by the search and consumers handlers.
+-}
+classificationFilters :: [ClassificationPreset] -> KeyMap Value -> [(Text, Text, Bool)]
+classificationFilters presets args = presetFilters ++ explicitClassFilter args
+  where
+    presetFilters = case textArg "preset" args of
+        Just pn -> case L.find (\p -> cpName p == pn) presets of
+            Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p]
+            Nothing -> []
+        Nothing -> []
+
 callSearchActivities :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
 callSearchActivities presets rid args (db, _) = do
     let name = textArg "name" args
@@ -514,24 +537,14 @@ callSearchActivities presets rid args (db, _) = do
         product' = textArg "product" args
         limit = intArg "limit" args
         exact = fromMaybe False (boolArg "exact" args)
-        isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
-        presetFilters = case textArg "preset" args of
-            Just pn -> case L.find (\p -> cpName p == pn) presets of
-                Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p]
-                Nothing -> []
-            Nothing -> []
-        explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of
-            (Just sys, Just val) -> [(sys, val, isExact)]
-            _ -> []
-        classFilters = presetFilters ++ explicitFilters
-    let sf =
+        sf =
             Service.SearchFilter
                 { Service.sfCore =
                     Service.ActivityFilterCore
                         { Service.afcName = name
                         , Service.afcLocation = geo
                         , Service.afcProduct = product'
-                        , Service.afcClassifications = classFilters
+                        , Service.afcClassifications = classificationFilters presets args
                         , Service.afcLimit = limit <|> Just 20
                         , Service.afcOffset = Nothing
                         , Service.afcSort = Nothing
@@ -543,9 +556,6 @@ callSearchActivities presets rid args (db, _) = do
     case result of
         Left err -> return $ toolError rid (T.pack $ show err)
         Right val -> return $ toolSuccessJson rid val
-  where
-    Nothing <|> b = b
-    a <|> _ = a
 
 callListClassifications :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
 callListClassifications rid args (db, _) =
@@ -586,9 +596,6 @@ callSearchFlows rid args (db, _) =
             case result of
                 Left err -> return $ toolError rid (T.pack $ show err)
                 Right val -> return $ toolSuccessJson rid val
-  where
-    Nothing <|> b = b
-    a <|> _ = a
 
 callGetActivity :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
 callGetActivity rid args (db, _) = runTool rid $ do
@@ -653,10 +660,6 @@ callGetSupplyChain dbManager rid args = runTool rid $ do
     let db = ldDatabase ld
         solver = ldSharedSolver ld
         depLookup = DM.mkDepSolverLookup dbManager
-        isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
-        classFilters = case (textArg "classification" args, textArg "classification_value" args) of
-            (Just sys, Just val) -> [(sys, val, isExact)]
-            _ -> []
         scf =
             Service.SupplyChainFilter
                 { Service.scfCore =
@@ -664,7 +667,7 @@ callGetSupplyChain dbManager rid args = runTool rid $ do
                         { Service.afcName = textArg "name" args
                         , Service.afcLocation = textArg "location" args
                         , Service.afcProduct = Nothing
-                        , Service.afcClassifications = classFilters
+                        , Service.afcClassifications = explicitClassFilter args
                         , Service.afcLimit = intArg "limit" args
                         , Service.afcOffset = Nothing
                         , Service.afcSort = Nothing
@@ -769,17 +772,7 @@ callGetPathTo rid args (db, solver) = runTool rid $ do
 callGetConsumers :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value
 callGetConsumers presets rid args (db, _) = runTool rid $ do
     pid <- except (requireText "process_id" args)
-    let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"]
-        dbName = fromMaybe "" (textArg "database" args) -- validated by withDb
-        presetFilters = case textArg "preset" args of
-            Just pn -> case L.find (\p -> cpName p == pn) presets of
-                Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p]
-                Nothing -> []
-            Nothing -> []
-        explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of
-            (Just sys, Just val) -> [(sys, val, isExact)]
-            _ -> []
-        classFilters = presetFilters ++ explicitFilters
+    let dbName = fromMaybe "" (textArg "database" args) -- validated by withDb
         cnf =
             Service.ConsumerFilter
                 { Service.cnfCore =
@@ -787,7 +780,7 @@ callGetConsumers presets rid args (db, _) = runTool rid $ do
                         { Service.afcName = textArg "name" args
                         , Service.afcLocation = textArg "location" args
                         , Service.afcProduct = textArg "product" args
-                        , Service.afcClassifications = classFilters
+                        , Service.afcClassifications = classificationFilters presets args
                         , Service.afcLimit = intArg "limit" args
                         , Service.afcOffset = Nothing
                         , Service.afcSort = Nothing
@@ -1086,11 +1079,7 @@ callComputeSensitivity dbManager mBaseUrl rid args =
             liftIO $
                 Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts
         (baselineX, perResults) <- liftShow eRes
-        let scoreOf x =
-                let inv = applyBiosphereMatrix db x
-                 in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of
-                        Right s -> Right s
-                        Left e -> Left e
+        let scoreOf x = computeLCIAScoreAuto unitCfg mUnits mFlows db x (applyBiosphereMatrix db x) hier tables
         baselineScore <- case scoreOf baselineX of
             Right s -> pure s
             Left e -> throwE ("baseline scoring failed: " <> e)

From c059d65422cdfbfb776c2e92400878e50544ef5e Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 19:20:37 +0200
Subject: [PATCH 38/43] fix(method): emit SimaPro categories that have no rows

A category header with zero CF rows (or a damage category with zero
impact rows) emits its Method/DamageCategory again, as before the
Stage-ADT refactor. A category can be declared before its factors are
added; dropping it silently would hide that from downstream.
---
 src/Method/ParserSimaPro.hs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/Method/ParserSimaPro.hs b/src/Method/ParserSimaPro.hs
index 0cae0dc7..037c3fdc 100644
--- a/src/Method/ParserSimaPro.hs
+++ b/src/Method/ParserSimaPro.hs
@@ -180,17 +180,19 @@ step cfg st line = case psStage st of
   where
     stripped = BS8.strip line
 
--- | Append the completed in-progress block (if any) onto the right list.
+-- | Append the completed category. A header with zero CF rows still emits an
+-- empty 'Method': a category can be declared before its factors are added, and
+-- silently dropping it would hide that from downstream.
 -- Shared by 'step' (mid-stream, on blanks/markers/End) and 'finalize' (at EOF).
 finishCat :: CatAccum -> ParseState -> ParseState
-finishCat (CatAccum name unit factors) st
-    | null factors = st
-    | otherwise = st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st}
+finishCat (CatAccum name unit factors) st =
+    st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st}
 
+-- | Append the completed damage category, including one with zero impact rows
+-- (same rationale as 'finishCat').
 finishDamage :: DamageAccum -> ParseState -> ParseState
-finishDamage (DamageAccum name unit impacts) st
-    | null impacts = st
-    | otherwise = st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st}
+finishDamage (DamageAccum name unit impacts) st =
+    st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st}
 
 finishNW :: NWAccum -> ParseState -> ParseState
 finishNW (NWAccum name norm weight) st

From 1702b503ad1fdb85eba7ca78c5d3600ada4c283b Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 19:20:42 +0200
Subject: [PATCH 39/43] fix(service): keep the root graph node at index 0 below
 cutoff

When the root activity falls below the cutoff threshold it is prepended
(becoming node 0) rather than left at its natural index, restoring the
node ordering and id mapping from before the cutoff refactor. Its value
is read from the supply list via lookup, so no partial indexing.
---
 src/Service.hs | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/Service.hs b/src/Service.hs
index fa2e8496..af2ac98a 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -673,19 +673,22 @@ filterTreeExport pat export =
         meta = (teTree export){tmTotalNodes = M.size filteredNodes}
      in export{teTree = meta, teNodes = filteredNodes, teEdges = filteredEdges}
 
-{- | Activities whose absolute cumulative value clears the threshold, plus the
-root activity (which we always surface, even if it falls below). Out-of-bounds
-roots become a zero-valued entry rather than crashing.
+{- | Activities whose absolute cumulative value clears the threshold. The root
+activity is always surfaced: above threshold it keeps its natural position;
+otherwise it is prepended (becoming node 0) with its actual supply value, or
+0 when out of bounds.
 -}
 selectSignificantActivities :: Double -> ProcessId -> [Double] -> [(ProcessId, Double)]
 selectSignificantActivities threshold rootPid supplyList =
-    let kept =
+    let aboveThreshold =
             [ (fromIntegral idx :: ProcessId, val)
             | (idx, val) <- zip [(0 :: Int) ..] supplyList
-            , abs val > threshold || idx == fromIntegral rootPid
+            , abs val > threshold
             ]
-        rootInBounds = fromIntegral rootPid < length supplyList
-     in if rootInBounds then kept else (rootPid, 0.0) : kept
+        rootValue = fromMaybe 0.0 (lookup (fromIntegral rootPid :: Int) (zip [0 ..] supplyList))
+     in if any ((== rootPid) . fst) aboveThreshold
+            then aboveThreshold
+            else (rootPid, rootValue) : aboveThreshold
 
 {- | True iff the exchange is a technosphere @Input@ whose link points at the
 given target activity. Waste exchanges aren't traversed by the graph builder

From c979d4b810515963cb9cb250c58269714c55514e Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 19:20:47 +0200
Subject: [PATCH 40/43] refactor(app): construct AppEnv with record syntax

Drop the mkAppEnv wrapper: five positional arguments with two adjacent
Maybes are easy to transpose. Call sites now name every field.
---
 app/Main.hs             | 11 +++++++++--
 src/API/BatchImpacts.hs | 20 +++++++++++++++++---
 src/App/Env.hs          | 10 ----------
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/app/Main.hs b/app/Main.hs
index be95a1d0..9a9272ae 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -39,7 +39,7 @@ import Progress
 import API.Licenses (licensesResponse)
 import API.MCP (mcpApp, toolDefinitions)
 import API.Routes (lcaAPI, lcaServer, volcaOpenApi)
-import App.Env (mkAppEnv)
+import App.Env (AppEnv (..))
 import Data.Aeson (encode)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as C8
@@ -337,7 +337,14 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon
     unless (desktopMode || hasFrontend) $
         reportProgress Info "Frontend not bundled — MCP responses will omit 'web_url'"
     mcp <- mcpApp dbManager filterPresets hasFrontend
-    let env = mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets
+    let env =
+            AppEnv
+                { aeDbManager = dbManager
+                , aeMaxTreeDepth = maxTreeDepth
+                , aePassword = password
+                , aeHostingConfig = hostingConfig
+                , aeClassificationPresets = filterPresets
+                }
         apiApp = serve lcaAPI (lcaServer env)
     pure $ \req respond -> do
         unless desktopMode (logRequest req)
diff --git a/src/API/BatchImpacts.hs b/src/API/BatchImpacts.hs
index 1624ff49..d3d189cd 100644
--- a/src/API/BatchImpacts.hs
+++ b/src/API/BatchImpacts.hs
@@ -21,7 +21,7 @@ module API.BatchImpacts (
 
 import API.Routes (activityLCIABatchH, batchImpactsH, collectionNotLoadedPrefix, databaseNotLoadedPrefix)
 import API.Types (BatchImpactsRequest (..), BatchImpactsResponse, LCIABatchResult, SubstitutionRequest)
-import App.Env (mkAppEnv, runApp)
+import App.Env (AppEnv (..), runApp)
 import Control.Concurrent.STM (readTVarIO)
 import qualified Data.ByteString.Lazy as BSL
 import qualified Data.Map as M
@@ -78,7 +78,14 @@ runActivityLCIABatch ::
     Maybe SubstitutionRequest ->
     IO (Either BatchError LCIABatchResult)
 runActivityLCIABatch dbm dbName pid coll mSub = do
-    let env = mkAppEnv dbm 0 Nothing Nothing []
+    let env =
+            AppEnv
+                { aeDbManager = dbm
+                , aeMaxTreeDepth = 0
+                , aePassword = Nothing
+                , aeHostingConfig = Nothing
+                , aeClassificationPresets = []
+                }
     res <- Servant.runHandler (runApp env (activityLCIABatchH dbName pid coll mSub))
     case res of
         Right lbr -> pure (Right lbr)
@@ -101,7 +108,14 @@ runBatchImpacts ::
     [Text] ->
     IO (Either BatchError BatchImpactsResponse)
 runBatchImpacts dbm dbName coll topFlows pids = do
-    let env = mkAppEnv dbm 0 Nothing Nothing []
+    let env =
+            AppEnv
+                { aeDbManager = dbm
+                , aeMaxTreeDepth = 0
+                , aePassword = Nothing
+                , aeHostingConfig = Nothing
+                , aeClassificationPresets = []
+                }
     res <-
         Servant.runHandler $
             runApp env $
diff --git a/src/App/Env.hs b/src/App/Env.hs
index e02d6ba6..6aeabd79 100644
--- a/src/App/Env.hs
+++ b/src/App/Env.hs
@@ -5,7 +5,6 @@
 -- mapping passed to Servant's 'hoistServer'.
 module App.Env (
     AppEnv (..),
-    mkAppEnv,
     AppM (..),
     runApp,
 ) where
@@ -26,15 +25,6 @@ data AppEnv = AppEnv
     , aeClassificationPresets :: ![Config.ClassificationPreset]
     }
 
-mkAppEnv
-    :: DatabaseManager
-    -> Int
-    -> Maybe String
-    -> Maybe Config.HostingConfig
-    -> [Config.ClassificationPreset]
-    -> AppEnv
-mkAppEnv = AppEnv
-
 newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a}
     deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError)
 

From 0cec4107f242a42ee48b6fe34ea612a01c379551 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 19:20:56 +0200
Subject: [PATCH 41/43] fix(json): keep the field name when the prefix strip
 empties it

An all-lowercase field has no Type-prefix boundary, so dropWhile isLower
would produce an empty JSON key. Fall back to the original label.
---
 src/API/JsonOptions.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs
index 15caea6e..e55094b5 100644
--- a/src/API/JsonOptions.hs
+++ b/src/API/JsonOptions.hs
@@ -35,11 +35,14 @@ import GHC.Generics (Generic, Rep)
 stripLowerPrefix :: Options
 stripLowerPrefix =
     defaultOptions
-        { fieldLabelModifier = lowerFirst . dropWhile isLower
+        { fieldLabelModifier = stripPrefix
         }
   where
-    lowerFirst "" = ""
-    lowerFirst (c : cs) = toLower c : cs
+    -- Drop the lowercase Type-prefix (@fooBar -> bar@). An all-lowercase field
+    -- has no prefix boundary, so keep it verbatim rather than emit an empty key.
+    stripPrefix label = case dropWhile isLower label of
+        "" -> label
+        (c : cs) -> toLower c : cs
 
 strippedToJSON :: (Generic a, GToJSON' Value Zero (Rep a)) => a -> Value
 strippedToJSON = genericToJSON stripLowerPrefix

From 9c5e1a6c240a86f28791eb586622930f69530709 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 19:21:24 +0200
Subject: [PATCH 42/43] refactor(solver): dedup factorization caching and
 cross-DB dep helpers (#98)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

A readability pass over `SharedSolver.hs` that removes duplicated logic
without changing behaviour.

- **Factorization caching deduped.** The lazy MUMPS factorization was
computed-and-cached in two near-identical blocks
(`solveWithSharedSolver`, `ensureFactorization`). Extracted
`computeAndStoreFactorization` so the precompute + store lives once; the
dense-solver fallback stays scoped to the first-solve (cache-miss) path
exactly as before.
- **Shared dep-demand helpers.** The per-root dep-demand → demand-vector
conversion was copied byte-for-byte in three resolvers (`resolveDep`,
`resolveDepContribs`, and `Service.resolveDepWithSubs`). Extracted
`prepareDepDemandVecs`, plus `depDbsOf` for the "DBs referenced at this
level" set.
- **`Semigroup CrossDBSolution`.** `mergeSolutions` is now `foldl' (<>)`
— same summed inventory and base-first / BFS-ordered scalings as the
hand-written version.
- **Named map types.** Introduced `SupplierDemands` / `DepDemands`
aliases in `Matrix` (where the type is produced) and used them across
the affected signatures, replacing the verbose nested-`Map` spelling.

We deliberately stopped short of unifying the two dependency-graph walks
(inventory vs. contributions) behind a generic interpreter: it barely
reduces line count, would rely on a K=1 invariant the types don't
enforce, and can't absorb the third walk in `Service.hs`.

## Test plan

- [x] `./gen-version.sh && cabal build` — clean (only two pre-existing,
unrelated `API/MCP.hs` warnings).
- [x] `cabal run lca-tests` — 1107 examples, 0 failures, 1 pending.
- [x] Rebased onto current `advanced_haskell`; rebuilt and re-ran the
full suite green on the rebased tree.
---
 src/Matrix.hs       |  15 ++++-
 src/Service.hs      |  16 +++--
 src/SharedSolver.hs | 141 ++++++++++++++++++++++----------------------
 3 files changed, 90 insertions(+), 82 deletions(-)

diff --git a/src/Matrix.hs b/src/Matrix.hs
index 2588b59e..cee30f0c 100644
--- a/src/Matrix.hs
+++ b/src/Matrix.hs
@@ -33,6 +33,8 @@ Performance characteristics:
 module Matrix (
     Vector,
     Inventory,
+    SupplierDemands,
+    DepDemands,
     computeScalingVector,
     applyBiosphereMatrix,
     computeInventoryMatrix,
@@ -95,6 +97,13 @@ type Vector = U.Vector Double
 -- | Final inventory vector mapping biosphere flow UUIDs to quantities.
 type Inventory = M.Map UUID Double
 
+-- | Demand a consumer DB places on one supplier @(activityUUID, productUUID)@:
+-- the amount plus the consumer's exchange unit (for cross-DB unit conversion).
+type SupplierDemands = M.Map (UUID, UUID) (Double, Text)
+
+-- | Per-dependency-DB supplier demands, keyed by source database name.
+type DepDemands = M.Map Text SupplierDemands
+
 {- | Per-database coalescing solver. A single worker thread owns the MUMPS
 handle and drains a queue of solve requests, batching whatever has piled up
 into one 'mumpsSolveMulti' call per round. Concurrent requests on the same
@@ -727,7 +736,7 @@ before accumulating.
 accumulateDepDemands ::
     Database ->
     Vector ->
-    M.Map Text (M.Map (UUID, UUID) (Double, Text))
+    DepDemands
 accumulateDepDemands db = accumulateDepDemandsWith db []
 
 {- | Same as 'accumulateDepDemands' but folds over an additional list of
@@ -743,7 +752,7 @@ accumulateDepDemandsWith ::
     Database ->
     [CrossDBLink] ->
     Vector ->
-    M.Map Text (M.Map (UUID, UUID) (Double, Text))
+    DepDemands
 accumulateDepDemandsWith db extraLinks scalingVec =
     foldr step M.empty (dbCrossDBLinks db ++ extraLinks)
   where
@@ -818,7 +827,7 @@ depDemandsToVector ::
     -- | dep DB name, for error messages
     Text ->
     Database ->
-    M.Map (UUID, UUID) (Double, Text) ->
+    SupplierDemands ->
     Either Text Vector
 depDemandsToVector unitConfig depDbName depDb demands = do
     converted <- traverse convertEntry (M.toList demands)
diff --git a/src/Service.hs b/src/Service.hs
index af2ac98a..847e422b 100644
--- a/src/Service.hs
+++ b/src/Service.hs
@@ -30,7 +30,7 @@ import qualified Data.UUID as UUID
 import qualified Data.Vector as V
 import qualified Data.Vector.Unboxed as U
 import Database (applyStructuredFilters, findActivitiesByFields, findFlowsBySynonym)
-import Matrix (Inventory, accumulateDepDemandsWith, activityNormalizationFactor, applyBiosphereMatrix, applySparseMatrix, buildDemandVectorFromIndex, computeInventoryMatrix, depDemandsToVector, perturbA, perturbABatch, toList)
+import Matrix (DepDemands, Inventory, accumulateDepDemandsWith, activityNormalizationFactor, applyBiosphereMatrix, applySparseMatrix, buildDemandVectorFromIndex, computeInventoryMatrix, depDemandsToVector, perturbA, perturbABatch, toList)
 import qualified Matrix.Export as MatrixExport
 import Plugin.Types (Severity (..), ValidateContext (..), ValidateHandle (..), ValidationIssue (..), ValidationPhase (..))
 import qualified Progress
@@ -2280,7 +2280,7 @@ resolveDepWithSubs ::
     SharedSolver.DepSolverLookup ->
     -- | ROOT DB's name (default for bare consumer/from/to refs)
     RootDb ->
-    [M.Map Text (M.Map (UUID, UUID) (Double, Text))] ->
+    [DepDemands] ->
     [Substitution] ->
     Int ->
     Int ->
@@ -2294,13 +2294,11 @@ resolveDepWithSubs unitCfg depLookup rootDb perRootDepDemands allSubs depth k de
             -- contributes 'Nothing' at every root; dropped before merge.
             pure (Right (replicate k Nothing))
         Just (depDb, depSolver) ->
-            let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands
-                depVecsE = traverse (depDemandsToVector unitCfg depDbName depDb) demandsPerRoot
-             in case depVecsE of
-                    Left err -> pure (Left (MatrixError err))
-                    Right depDemandVecs -> do
-                        sols <- goWithSubsAndDeps unitCfg depLookup depDb (ThisDb depDbName) rootDb depSolver depDemandVecs allSubs (depth + 1)
-                        pure $ fmap (map Just) sols
+            case SharedSolver.prepareDepDemandVecs unitCfg depDbName depDb perRootDepDemands of
+                Left err -> pure (Left (MatrixError err))
+                Right depDemandVecs -> do
+                    sols <- goWithSubsAndDeps unitCfg depLookup depDb (ThisDb depDbName) rootDb depSolver depDemandVecs allSubs (depth + 1)
+                    pure $ fmap (map Just) sols
 
 {- | Cross-DB substitution resolver (root-only path, used by supply-chain).
 
diff --git a/src/SharedSolver.hs b/src/SharedSolver.hs
index 358d2823..b0798eea 100644
--- a/src/SharedSolver.hs
+++ b/src/SharedSolver.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 
 {- |
@@ -33,12 +34,13 @@ module SharedSolver (
     computeInventoryMatrixBatchWithDepsCached,
     goWithDepsFromScalings,
     mergeSolutions,
+    prepareDepDemandVecs,
     crossDBProcessContributions,
 ) where
 
 import Control.Concurrent.Async (mapConcurrently)
-import Control.Concurrent.MVar (MVar, modifyMVar, modifyMVar_, newMVar, readMVar, withMVar)
-import Control.Exception (SomeException, catch)
+import Control.Concurrent.MVar (MVar, modifyMVar_, newMVar, readMVar, withMVar)
+import Control.Exception (SomeException, try)
 import Data.List (transpose)
 import Data.List.NonEmpty (NonEmpty (..))
 import qualified Data.List.NonEmpty as NE
@@ -47,6 +49,7 @@ import Data.Maybe (catMaybes)
 import qualified Data.Set as S
 import Data.Text (Text)
 import Matrix (
+    DepDemands,
     Inventory,
     Vector,
     accumulateDepDemands,
@@ -89,40 +92,36 @@ createSharedSolver dbName techTriples activityCount = do
     factVar <- newMVar Nothing
     return $ SharedSolver lock factVar techTriples activityCount dbName
 
+{- | Compute the factorization and cache it. Assumes 'solverLock' is held and
+the cache is empty (i.e. only called on a miss). Shared by the first-solve
+path and 'ensureFactorization'.
+-}
+computeAndStoreFactorization :: SharedSolver -> IO MatrixFactorization
+computeAndStoreFactorization solver = do
+    reportProgress Info $ "Factorizing '" ++ show (solverDbName solver) ++ "' on first use"
+    fact <-
+        precomputeMatrixFactorization
+            (solverDbName solver)
+            (solverTechTriples solver)
+            (solverActivityCount solver)
+    modifyMVar_ (solverFactorizationVar solver) (const (pure (Just fact)))
+    pure fact
+
 -- | Solve using shared solver. On first call, triggers lazy factorization.
 solveWithSharedSolver :: SharedSolver -> Vector -> IO Vector
-solveWithSharedSolver solver demandVector = do
-    withMVar (solverLock solver) $ \_ -> do
-        maybeFact <- readMVar (solverFactorizationVar solver)
-        case maybeFact of
-            Just factorization -> do
+solveWithSharedSolver solver demandVector =
+    withMVar (solverLock solver) $ \_ ->
+        readMVar (solverFactorizationVar solver) >>= \case
+            Just fact -> do
                 reportProgress Solver "Using cached factorization for sub-second solve"
-                solveSparseLinearSystemWithFactorization factorization demandVector
-            Nothing -> do
-                -- First solve: factorize, cache, then solve
-                reportProgress Info $ "First solve for '" ++ show (solverDbName solver) ++ "' — computing factorization..."
-                factResult <-
-                    catch
-                        ( do
-                            factorization <-
-                                precomputeMatrixFactorization
-                                    (solverDbName solver)
-                                    (solverTechTriples solver)
-                                    (solverActivityCount solver)
-                            -- Store for subsequent solves (replace the MVar contents)
-                            _ <- modifyMVar (solverFactorizationVar solver) $ \_ -> return (Just factorization, ())
-                            reportProgress Info "Factorization complete — solving"
-                            result <- solveSparseLinearSystemWithFactorization factorization demandVector
-                            return (Just result)
-                        )
-                        ( \e -> do
-                            reportProgress Solver $ "Factorization failed: " ++ show (e :: SomeException) ++ " — using fallback solver"
-                            return Nothing
-                        )
-                case factResult of
-                    Just result -> return result
-                    Nothing ->
-                        solveSparseLinearSystem (solverTechTriples solver) (solverActivityCount solver) demandVector
+                solveSparseLinearSystemWithFactorization fact demandVector
+            Nothing ->
+                try (computeAndStoreFactorization solver >>= flip solveSparseLinearSystemWithFactorization demandVector)
+                    >>= either fallback pure
+  where
+    fallback e = do
+        reportProgress Solver $ "Factorization failed: " ++ show (e :: SomeException) ++ " — using fallback solver"
+        solveSparseLinearSystem (solverTechTriples solver) (solverActivityCount solver) demandVector
 
 -- | Read the cached factorization without solving. Returns Nothing until the first solve.
 getFactorization :: SharedSolver -> IO (Maybe MatrixFactorization)
@@ -132,19 +131,10 @@ getFactorization solver = readMVar (solverFactorizationVar solver)
 Safe to call from multiple threads: the solverLock serializes first-time factorization.
 -}
 ensureFactorization :: SharedSolver -> IO MatrixFactorization
-ensureFactorization solver = withMVar (solverLock solver) $ \_ -> do
-    maybeFact <- readMVar (solverFactorizationVar solver)
-    case maybeFact of
+ensureFactorization solver = withMVar (solverLock solver) $ \_ ->
+    readMVar (solverFactorizationVar solver) >>= \case
         Just fact -> pure fact
-        Nothing -> do
-            reportProgress Info $ "Factorizing '" ++ show (solverDbName solver) ++ "' on first use"
-            fact <-
-                precomputeMatrixFactorization
-                    (solverDbName solver)
-                    (solverTechTriples solver)
-                    (solverActivityCount solver)
-            modifyMVar_ (solverFactorizationVar solver) $ \_ -> pure (Just fact)
-            pure fact
+        Nothing -> computeAndStoreFactorization solver
 
 {- | Solve with multiple RHS vectors in one MUMPS call, using the cached factorization.
 Forces factorization on first call. Subsequent calls reuse the cached LU.
@@ -204,6 +194,16 @@ data CrossDBSolution = CrossDBSolution
     , csScalings :: !(NonEmpty (Text, Database, Vector))
     }
 
+{- | Combine two cross-DB solutions: sum their inventories and concatenate
+their visited-DB scalings. Associative; folding from a base preserves the
+base-first, deps-in-order BFS layout 'csScalings' documents.
+-}
+instance Semigroup CrossDBSolution where
+    a <> b =
+        CrossDBSolution
+            (M.unionWith (+) (csInventory a) (csInventory b))
+            (csScalings a <> csScalings b)
+
 {- |
 Batch inventory with cross-DB back-substitution. Multi-RHS is preserved at
 every level of the dependency DAG:
@@ -320,7 +320,7 @@ goWithDepsFromScalings unitConfig depLookup db dbName extraLinks scalings depth
         then pure (Right baseSolutions)
         else do
             let perRootDepDemands = map (accumulateDepDemandsWith db extraLinks) scalings
-                allDepDbs = S.toList $ S.unions $ map M.keysSet perRootDepDemands
+                allDepDbs = depDbsOf perRootDepDemands
             if null allDepDbs
                 then pure (Right baseSolutions)
                 else do
@@ -352,20 +352,25 @@ Exported so the substitution-aware solver ('Service.goWithSubsAndDeps')
 reuses the same merge shape as the plain cross-DB solver.
 -}
 mergeSolutions :: CrossDBSolution -> [CrossDBSolution] -> CrossDBSolution
-mergeSolutions base depSols =
-    CrossDBSolution
-        { csInventory =
-            foldr (M.unionWith (+)) (csInventory base) (map csInventory depSols)
-        , csScalings =
-            NE.appendList
-                (csScalings base)
-                (concatMap (NE.toList . csScalings) depSols)
-        }
+mergeSolutions = foldl' (<>)
+
+-- | Every dependency DB referenced across a level's per-root demand maps.
+depDbsOf :: [DepDemands] -> [Text]
+depDbsOf = S.toList . S.unions . map M.keysSet
+
+{- | Turn a level's per-root demand maps into the dep DB's per-root demand
+vectors, performing unit conversion. Picks out @depDbName@'s share of each
+root's demands. Shared by every dep resolver (inventory, contributions, and
+the substitution-aware path in 'Service').
+-}
+prepareDepDemandVecs :: UnitConfig -> Text -> Database -> [DepDemands] -> Either Text [Vector]
+prepareDepDemandVecs unitConfig depDbName depDb =
+    traverse (depDemandsToVector unitConfig depDbName depDb . M.findWithDefault M.empty depDbName)
 
 resolveDep ::
     UnitConfig ->
     DepSolverLookup ->
-    [M.Map Text (M.Map (UUID, UUID) (Double, Text))] ->
+    [DepDemands] ->
     -- | current depth (for recursion)
     Int ->
     -- | K (so absent-dep returns the right number of 'Nothing' padding)
@@ -383,13 +388,11 @@ resolveDep unitConfig depLookup perRootDepDemands depth k depDbName = do
             -- materialising one (which the NonEmpty type forbids).
             pure (Right (replicate k Nothing))
         Just (depDb, depSolver) ->
-            let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands
-                depVecsE = traverse (depDemandsToVector unitConfig depDbName depDb) demandsPerRoot
-             in case depVecsE of
-                    Left err -> pure (Left err)
-                    Right depDemandVecs -> do
-                        sols <- goWithDeps unitConfig depLookup depDb depDbName depSolver depDemandVecs (depth + 1)
-                        pure $ fmap (map Just) sols
+            case prepareDepDemandVecs unitConfig depDbName depDb perRootDepDemands of
+                Left err -> pure (Left err)
+                Right depDemandVecs -> do
+                    sols <- goWithDeps unitConfig depLookup depDb depDbName depSolver depDemandVecs (depth + 1)
+                    pure $ fmap (map Just) sols
 
 {- | Cross-DB per-activity LCIA contributions. Walks the same dep graph as
 'goWithDeps' but attributes contributions per @(dbName, localPid)@ instead
@@ -442,7 +445,7 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r
             then pure (Right localTagged)
             else do
                 let perRootDepDemands = map (accumulateDepDemands db) scalings
-                    allDepDbs = S.toList $ S.unions $ map M.keysSet perRootDepDemands
+                    allDepDbs = depDbsOf perRootDepDemands
                 if null allDepDbs
                     then pure (Right localTagged)
                     else do
@@ -453,7 +456,7 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r
                                 Right $ foldr (M.unionWith (+)) localTagged depMaps
 
     resolveDepContribs ::
-        [M.Map Text (M.Map (UUID, UUID) (Double, Text))] ->
+        [DepDemands] ->
         Int ->
         Text ->
         IO (Either Text (M.Map (Text, ProcessId) Double))
@@ -462,8 +465,6 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r
         case depM of
             Nothing -> pure (Right M.empty) -- dep DB not loaded; root-level gate should have caught this
             Just (depDb, depSolver) ->
-                let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands
-                    depVecsE = traverse (depDemandsToVector unitConfig depDbName depDb) demandsPerRoot
-                 in case depVecsE of
-                        Left err -> pure (Left err)
-                        Right depDemandVecs -> go depDb depDbName depSolver depDemandVecs (depth + 1)
+                case prepareDepDemandVecs unitConfig depDbName depDb perRootDepDemands of
+                    Left err -> pure (Left err)
+                    Right depDemandVecs -> go depDb depDbName depSolver depDemandVecs (depth + 1)

From b76fbacbb526a7bf2e67871b70fb4158e9ddd659 Mon Sep 17 00:00:00 2001
From: Christophe Combelles <ccomb@free.fr>
Date: Thu, 28 May 2026 21:11:36 +0200
Subject: [PATCH 43/43] refactor(ecospold): extract shared cut-off strategy
 into EcoSpold.Cutoff (#99)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What

The cut-off post-processing — reducing a multi-output dataset to a
single reference product so the engine sees a single-output process —
was duplicated **verbatim** between the EcoSpold1 and EcoSpold2 parsers
(~70 lines, the same eight-function chain in each). This moves that
block into one shared `EcoSpold.Cutoff` module that both parsers import.

This is the last remaining piece of the EcoSpold refactor series (after
the Parser2 split #97 and the Parser1 fold dedup #94), which left the
cut-off block still copy-pasted in both files.

## Changes

- **New `src/EcoSpold/Cutoff.hs`** — holds `applyCutoffStrategy`,
`hasReferenceProduct`, `removeZeroAmountCoproducts`,
`assignSingleProductAsReference`, `isProductionExchange` (exported) plus
the internal `updateReferenceProduct` / `markAsReference` /
`unmarkAsReference`.
- **`Parser1.hs` / `Parser2.hs`** — delete the duplicated block, `import
EcoSpold.Cutoff (applyCutoffStrategy)`. Parser1 drops its "exported for
testing" cut-off helpers.
- **`EcoSpold1Spec.hs`** — imports the cut-off helpers from
`EcoSpold.Cutoff` instead of the old `EcoSpold.Parser1` re-export.
- **`volca.cabal`** — registers the new library module.

Pure post-fold logic, runs once per activity (not in the SAX loop), so
no behaviour or throughput change. Net: ~154 duplicated lines removed
across the two parsers, replaced by one ~95-line module.

## Verification

- `cabal build lib:volca` + `exe:volca`: clean (only pre-existing
unrelated MCP warnings).
- `cabal test lca-tests`: **1107 examples, 0 failures, 1 pending** —
including the EcoSpold1 cut-off-helper tests now resolving against the
new module.
---
 src/EcoSpold/Cutoff.hs  | 94 +++++++++++++++++++++++++++++++++++++++++
 src/EcoSpold/Parser1.hs | 77 +--------------------------------
 src/EcoSpold/Parser2.hs | 79 +---------------------------------
 test/EcoSpold1Spec.hs   |  1 +
 volca.cabal             |  1 +
 5 files changed, 98 insertions(+), 154 deletions(-)
 create mode 100644 src/EcoSpold/Cutoff.hs

diff --git a/src/EcoSpold/Cutoff.hs b/src/EcoSpold/Cutoff.hs
new file mode 100644
index 00000000..04d204e8
--- /dev/null
+++ b/src/EcoSpold/Cutoff.hs
@@ -0,0 +1,94 @@
+{- | Cut-off allocation strategy shared by the EcoSpold1 and EcoSpold2 parsers.
+
+Post-processing applied once per parsed activity (outside the SAX fold):
+reduce a multi-output dataset to a single reference product so the downstream
+engine sees a single-output process.
+-}
+module EcoSpold.Cutoff (
+    applyCutoffStrategy,
+    hasReferenceProduct,
+    removeZeroAmountCoproducts,
+    assignSingleProductAsReference,
+    isProductionExchange,
+) where
+
+import qualified Data.Text as T
+import Types
+
+{- | Apply cut-off strategy
+1. Remove zero-amount production exchanges (co-products)
+2. Assign single non-zero product as reference product
+3. Ensure single-output process structure
+4. VALIDATION: Fail if no reference product can be established
+-}
+applyCutoffStrategy :: Activity -> Either String Activity
+applyCutoffStrategy activity =
+    let filteredExchanges = removeZeroAmountCoproducts (exchanges activity)
+        updatedActivity = activity{exchanges = filteredExchanges}
+        finalActivity =
+            if hasReferenceProduct updatedActivity
+                then updatedActivity
+                else assignSingleProductAsReference updatedActivity
+     in if hasReferenceProduct finalActivity
+            then Right finalActivity
+            else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity)
+
+-- | Check if activity has any reference product
+hasReferenceProduct :: Activity -> Bool
+hasReferenceProduct activity = any exchangeIsReference (exchanges activity)
+
+-- | Remove production exchanges with zero amounts
+removeZeroAmountCoproducts :: [Exchange] -> [Exchange]
+removeZeroAmountCoproducts = filter keepExchange
+  where
+    keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True
+    keepExchange TechnosphereExchange{techRole = ReferenceInput} = True
+    keepExchange TechnosphereExchange{techRole = Input} = True
+    keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0
+    keepExchange BiosphereExchange{} = True
+    keepExchange WasteExchange{} = True
+
+-- | Assign single product as reference product
+assignSingleProductAsReference :: Activity -> Activity
+assignSingleProductAsReference activity =
+    let productionExchanges = [ex | ex <- exchanges activity, isProductionExchange ex]
+        nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0]
+     in case nonZeroProduction of
+            [singleProduct] ->
+                -- Update the single product to be reference product
+                let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges activity)
+                 in activity{exchanges = updatedExchanges}
+            [] -> activity -- No production exchanges, leave as-is
+            _ -> activity -- Multiple production exchanges, leave as-is (shouldn't happen after cutoff)
+
+-- | Check if exchange is production exchange (technosphere output)
+isProductionExchange :: Exchange -> Bool
+isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True
+isProductionExchange TechnosphereExchange{techRole = Coproduct} = True
+isProductionExchange TechnosphereExchange{techRole = Input} = False
+isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False
+isProductionExchange BiosphereExchange{} = False
+isProductionExchange WasteExchange{} = False -- waste outputs aren't "production" in the SimaPro sense
+
+-- | Update reference product flag for the specified exchange
+updateReferenceProduct :: Exchange -> Exchange -> Exchange
+updateReferenceProduct target current
+    | exchangeFlowId target == exchangeFlowId current = markAsReference current
+    | otherwise = unmarkAsReference current
+
+-- | Promote a production exchange to reference product
+markAsReference :: Exchange -> Exchange
+markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct}
+markAsReference ex@BiosphereExchange{} = ex
+markAsReference ex@WasteExchange{} = ex -- waste flows can't be promoted to reference product
+
+-- | Demote a reference role back to non-reference (preserving input/output direction)
+unmarkAsReference :: Exchange -> Exchange
+unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role}
+  where
+    demote ReferenceProduct = Coproduct
+    demote ReferenceInput = Input
+    demote Coproduct = Coproduct
+    demote Input = Input
+unmarkAsReference ex@BiosphereExchange{} = ex
+unmarkAsReference ex@WasteExchange{} = ex
diff --git a/src/EcoSpold/Parser1.hs b/src/EcoSpold/Parser1.hs
index 0c18de02..73c4f0ab 100644
--- a/src/EcoSpold/Parser1.hs
+++ b/src/EcoSpold/Parser1.hs
@@ -17,11 +17,6 @@ module EcoSpold.Parser1 (
     -- * Pure helpers (exported for testing)
     generateFlowUUID,
     generateUnitUUID,
-    applyCutoffStrategy,
-    hasReferenceProduct,
-    removeZeroAmountCoproducts,
-    assignSingleProductAsReference,
-    isProductionExchange,
 ) where
 
 import Control.Monad (forM_)
@@ -36,6 +31,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.UUID as UUID
 import qualified Data.UUID.V5 as UUID5
 import EcoSpold.Common (bsToDouble, bsToInt, bsToText, isElement, nonEmptyText)
+import EcoSpold.Cutoff (applyCutoffStrategy)
 import Progress (ProgressLevel (..), reportProgress)
 import Types
 import qualified Xeno.SAX as X
@@ -482,77 +478,6 @@ streamParseActivityAndFlowsFromFile1 path = do
     !xmlContent <- BS.readFile path
     return (parseWithXeno xmlContent)
 
--- | Apply cut-off strategy (same logic as EcoSpold2)
-applyCutoffStrategy :: Activity -> Either String Activity
-applyCutoffStrategy activity =
-    let filteredExchanges = removeZeroAmountCoproducts (exchanges activity)
-        updatedActivity = activity{exchanges = filteredExchanges}
-        finalActivity =
-            if hasReferenceProduct updatedActivity
-                then updatedActivity
-                else assignSingleProductAsReference updatedActivity
-     in if hasReferenceProduct finalActivity
-            then Right finalActivity
-            else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity)
-
--- | Check if activity has any reference product
-hasReferenceProduct :: Activity -> Bool
-hasReferenceProduct act = any exchangeIsReference (exchanges act)
-
--- | Remove production exchanges with zero amounts
-removeZeroAmountCoproducts :: [Exchange] -> [Exchange]
-removeZeroAmountCoproducts = filter keepExchange
-  where
-    keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True
-    keepExchange TechnosphereExchange{techRole = ReferenceInput} = True
-    keepExchange TechnosphereExchange{techRole = Input} = True
-    keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0
-    keepExchange BiosphereExchange{} = True
-    keepExchange WasteExchange{} = True
-
--- | Assign single product as reference product
-assignSingleProductAsReference :: Activity -> Activity
-assignSingleProductAsReference act =
-    let productionExchanges = [ex | ex <- exchanges act, isProductionExchange ex]
-        nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0]
-     in case nonZeroProduction of
-            [singleProduct] ->
-                let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges act)
-                 in act{exchanges = updatedExchanges}
-            _ -> act
-
--- | Check if exchange is production exchange (technosphere output)
-isProductionExchange :: Exchange -> Bool
-isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True
-isProductionExchange TechnosphereExchange{techRole = Coproduct} = True
-isProductionExchange TechnosphereExchange{techRole = Input} = False
-isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False
-isProductionExchange BiosphereExchange{} = False
-isProductionExchange WasteExchange{} = False -- waste outputs aren't "production" in the SimaPro sense
-
--- | Update reference product flag
-updateReferenceProduct :: Exchange -> Exchange -> Exchange
-updateReferenceProduct target current
-    | exchangeFlowId target == exchangeFlowId current = markAsReference current
-    | otherwise = unmarkAsReference current
-
--- | Promote a production exchange to reference product
-markAsReference :: Exchange -> Exchange
-markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct}
-markAsReference ex@BiosphereExchange{} = ex
-markAsReference ex@WasteExchange{} = ex -- waste flows can't be promoted to reference product
-
--- | Demote a reference role back to non-reference (preserving input/output direction)
-unmarkAsReference :: Exchange -> Exchange
-unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role}
-  where
-    demote ReferenceProduct = Coproduct
-    demote ReferenceInput = Input
-    demote Coproduct = Coproduct
-    demote Input = Input
-unmarkAsReference ex@BiosphereExchange{} = ex
-unmarkAsReference ex@WasteExchange{} = ex
-
 -- ============================================================================
 -- Multi-dataset file support
 -- ============================================================================
diff --git a/src/EcoSpold/Parser2.hs b/src/EcoSpold/Parser2.hs
index d82fe7d3..0cd910f2 100644
--- a/src/EcoSpold/Parser2.hs
+++ b/src/EcoSpold/Parser2.hs
@@ -15,6 +15,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.UUID as UUID
 import qualified Data.UUID.V5 as UUID5
 import EcoSpold.Common (bsToDouble, bsToInt, bsToIntMaybe, bsToText, isElement)
+import EcoSpold.Cutoff (applyCutoffStrategy)
 import Progress (ProgressLevel (..), reportProgress)
 import System.FilePath (takeBaseName)
 import Types
@@ -675,81 +676,3 @@ streamParseActivityAndFlowsFromFile path = do
             Right (result, warnings) -> do
                 mapM_ (reportProgress Warning) warnings
                 return $ Right result
-
-{- | Apply cut-off strategy
-1. Remove zero-amount production exchanges (co-products)
-2. Assign single non-zero product as reference product
-3. Ensure single-output process structure
-4. VALIDATION: Fail if no reference product can be established
--}
-applyCutoffStrategy :: Activity -> Either String Activity
-applyCutoffStrategy activity =
-    let filteredExchanges = removeZeroAmountCoproducts (exchanges activity)
-        updatedActivity = activity{exchanges = filteredExchanges}
-        finalActivity =
-            if hasReferenceProduct updatedActivity
-                then updatedActivity
-                else assignSingleProductAsReference updatedActivity
-     in if hasReferenceProduct finalActivity
-            then Right finalActivity
-            else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity)
-
--- | Check if activity has any reference product
-hasReferenceProduct :: Activity -> Bool
-hasReferenceProduct activity = any exchangeIsReference (exchanges activity)
-
--- | Remove production exchanges with zero amounts
-removeZeroAmountCoproducts :: [Exchange] -> [Exchange]
-removeZeroAmountCoproducts = filter keepExchange
-  where
-    keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True
-    keepExchange TechnosphereExchange{techRole = ReferenceInput} = True
-    keepExchange TechnosphereExchange{techRole = Input} = True
-    keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0
-    keepExchange BiosphereExchange{} = True
-    keepExchange WasteExchange{} = True
-
--- | Assign single product as reference product
-assignSingleProductAsReference :: Activity -> Activity
-assignSingleProductAsReference activity =
-    let productionExchanges = [ex | ex <- exchanges activity, isProductionExchange ex]
-        nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0]
-     in case nonZeroProduction of
-            [singleProduct] ->
-                -- Update the single product to be reference product
-                let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges activity)
-                 in activity{exchanges = updatedExchanges}
-            [] -> activity -- No production exchanges, leave as-is
-            _ -> activity -- Multiple production exchanges, leave as-is (shouldn't happen after cutoff)
-
--- | Check if exchange is production exchange (technosphere output)
-isProductionExchange :: Exchange -> Bool
-isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True
-isProductionExchange TechnosphereExchange{techRole = Coproduct} = True
-isProductionExchange TechnosphereExchange{techRole = Input} = False
-isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False
-isProductionExchange BiosphereExchange{} = False
-isProductionExchange WasteExchange{} = False
-
--- | Update reference product flag for the specified exchange
-updateReferenceProduct :: Exchange -> Exchange -> Exchange
-updateReferenceProduct target current
-    | exchangeFlowId target == exchangeFlowId current = markAsReference current
-    | otherwise = unmarkAsReference current
-
--- | Promote a production exchange to reference product
-markAsReference :: Exchange -> Exchange
-markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct}
-markAsReference ex@BiosphereExchange{} = ex
-markAsReference ex@WasteExchange{} = ex
-
--- | Demote a reference role back to non-reference (preserving input/output direction)
-unmarkAsReference :: Exchange -> Exchange
-unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role}
-  where
-    demote ReferenceProduct = Coproduct
-    demote ReferenceInput = Input
-    demote Coproduct = Coproduct
-    demote Input = Input
-unmarkAsReference ex@BiosphereExchange{} = ex
-unmarkAsReference ex@WasteExchange{} = ex
diff --git a/test/EcoSpold1Spec.hs b/test/EcoSpold1Spec.hs
index a1e9c46e..db693f2e 100644
--- a/test/EcoSpold1Spec.hs
+++ b/test/EcoSpold1Spec.hs
@@ -7,6 +7,7 @@ import qualified Data.Map.Strict as M
 import Data.UUID (nil)
 import Test.Hspec
 
+import EcoSpold.Cutoff
 import EcoSpold.Parser1
 import Types
 
diff --git a/volca.cabal b/volca.cabal
index cf215f66..509127ba 100644
--- a/volca.cabal
+++ b/volca.cabal
@@ -73,6 +73,7 @@ library
                      , CLI.Client
                      , CLI.Repl
                      , EcoSpold.Common
+                     , EcoSpold.Cutoff
                      , EcoSpold.Parser2
                      , EcoSpold.Parser1
                      , SimaPro.Parser