From 4fa2b857a40832663b66705d5a286e3ba40c4ce8 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:29:50 +0200 Subject: [PATCH 01/43] feat(json): add Stripped newtype for DerivingVia carrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce `newtype Stripped a` in API.JsonOptions exposing ToJSON, FromJSON, and ToSchema instances that delegate to the existing strippedToJSON/strippedToEncoding/strippedParseJSON/strippedSchemaOptions helpers. The instance bodies bypass the user-facing typeclass on `a` and call the Generic helpers directly, so a user-side `deriving via (Stripped Foo) instance ToJSON Foo` does not recurse. This is pure addition — no existing instance is converted yet. The next two commits collapse ~180 hand-rolled instance blocks in src/API/Types.hs, src/Types.hs, and src/API/OpenApi.hs onto this carrier. Categorically: Stripped a is a zero-cost iso (Coercible) of a, so the Generic Rep is shared; the typeclass dictionary derived for Stripped a transfers along the iso to a itself. --- src/API/JsonOptions.hs | 47 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs index 6f81ceb8..442fec54 100644 --- a/src/API/JsonOptions.hs +++ b/src/API/JsonOptions.hs @@ -1,4 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UndecidableInstances #-} module API.JsonOptions ( stripLowerPrefix, @@ -6,11 +9,14 @@ module API.JsonOptions ( strippedToEncoding, strippedParseJSON, strippedSchemaOptions, + Stripped (..), ) where import Data.Aeson ( Encoding, + FromJSON (..), Options, + ToJSON (..), Value, defaultOptions, fieldLabelModifier, @@ -20,7 +26,10 @@ import Data.Aeson ( ) import Data.Aeson.Types (GFromJSON, GToEncoding, GToJSON', Parser, Zero) import Data.Char (isLower, toLower) -import Data.OpenApi.Schema (SchemaOptions, fromAesonOptions) +import Data.OpenApi.Schema (SchemaOptions, ToSchema (..), fromAesonOptions, genericDeclareNamedSchema) +import Data.OpenApi.Internal.Schema (GToSchema) +import Data.Proxy (Proxy (..)) +import Data.Typeable (Typeable) import GHC.Generics (Generic, Rep) stripLowerPrefix :: Options @@ -43,3 +52,39 @@ strippedParseJSON = genericParseJSON stripLowerPrefix strippedSchemaOptions :: SchemaOptions strippedSchemaOptions = fromAesonOptions stripLowerPrefix + +{- | DerivingVia carrier collapsing the ~180 hand-rolled @ToJSON@ \/ @FromJSON@ +\/ @ToSchema@ instance blocks that share the same shape: + +@ + toJSON = strippedToJSON + toEncoding = strippedToEncoding + parseJSON = strippedParseJSON + declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions +@ + +Usage: + +@ + data Foo = Foo { fooBar :: Int } deriving Generic + deriving via (Stripped Foo) instance ToJSON Foo + deriving via (Stripped Foo) instance FromJSON Foo + deriving via (Stripped Foo) instance ToSchema Foo +@ + +Categorically: dictionary transport along the zero-cost iso witnessed by +'Coercible'. The instance bodies bypass the user-facing typeclass on @a@ and +call the @Generic@ helpers directly, avoiding the recursion that would arise +from @instance ToJSON a => ToJSON (Stripped a)@. +-} +newtype Stripped a = Stripped {unStripped :: a} + +instance (Generic a, GToJSON' Value Zero (Rep a), GToEncoding Zero (Rep a)) => ToJSON (Stripped a) where + toJSON (Stripped a) = strippedToJSON a + toEncoding (Stripped a) = strippedToEncoding a + +instance (Generic a, GFromJSON Zero (Rep a)) => FromJSON (Stripped a) where + parseJSON v = Stripped <$> strippedParseJSON v + +instance (Typeable a, Generic a, GToSchema (Rep a)) => ToSchema (Stripped a) where + declareNamedSchema _ = genericDeclareNamedSchema strippedSchemaOptions (Proxy :: Proxy a) From ff63b2097531fc6a2df6cb6419cc29004ecd4a93 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:39:17 +0200 Subject: [PATCH 02/43] refactor(api): collapse 94 ToJSON/FromJSON instances via Stripped Convert hand-rolled `instance ToJSON X where toJSON = strippedToJSON; ...` and `instance FromJSON X where parseJSON = strippedParseJSON` blocks for 70 record types in API/Types.hs to attached deriving clauses: data Foo = Foo {...} deriving (Generic) deriving (ToJSON, FromJSON) via (Stripped Foo) The wire format is unchanged (openapi.json byte-identical against the pre-branch baseline; full hspec suite green at 1052/1052). Kept manual: ApiFlow (custom tagged-union shape), NodeType / EdgeType / FlowRole (default generic sum encoding), PerturbedEntry (custom Either-flatten). ToSchema migration deliberately deferred: instances live as orphans in API/OpenApi.hs and depend on types (Exchange, ApiFlow) whose schemas are themselves defined there, creating a compilation-order cycle if we try to derive in API/Types.hs. Will revisit as its own commit. --- src/API/Types.hs | 167 ++++++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 95 deletions(-) diff --git a/src/API/Types.hs b/src/API/Types.hs index b1f86f89..81fb81d2 100644 --- a/src/API/Types.hs +++ b/src/API/Types.hs @@ -1,11 +1,12 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DerivingVia #-} {-# OPTIONS_GHC -Wno-orphans #-} module API.Types where -import API.JsonOptions (strippedParseJSON, strippedToEncoding, strippedToJSON) +import API.JsonOptions (Stripped (..), strippedParseJSON, strippedToEncoding, strippedToJSON) import Data.Aeson import Data.Aeson.Types (Parser) import qualified Data.ByteString.Lazy as BSL @@ -117,6 +118,7 @@ data ActivitySummary = ActivitySummary , prsAllocationFormula :: Maybe Text -- Raw SimaPro allocation formula; Nothing if purely numeric } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivitySummary) -- | Consumer result — ActivitySummary enriched with BFS depth from the queried supplier data ConsumerResult = ConsumerResult @@ -130,6 +132,7 @@ data ConsumerResult = ConsumerResult , crClassifications :: M.Map Text Text -- Classifications (ISIC, CPC, Category, etc.), mirrors SupplyChainEntry } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ConsumerResult) {- | Wrapper for /consumers responses. Mirrors 'SupplyChainResponse' so clients have a uniform {entries, edges} shape in both traversal directions. Edges @@ -141,6 +144,7 @@ data ConsumersResponse = ConsumersResponse , crrEdges :: ![SupplyChainEdge] } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ConsumersResponse) -- | Enhanced flow information for search results (now includes synonyms) data FlowSearchResult = FlowSearchResult @@ -151,6 +155,7 @@ data FlowSearchResult = FlowSearchResult , fsrSynonyms :: M.Map Text [Text] -- Synonyms by language (converted from Set to List for JSON) } deriving (Generic) + deriving (ToJSON) via (Stripped FlowSearchResult) -- | Inventory export data structures data InventoryExport = InventoryExport @@ -159,6 +164,7 @@ data InventoryExport = InventoryExport , ieStatistics :: InventoryStatistics } deriving (Generic) + deriving (ToJSON) via (Stripped InventoryExport) data InventoryMetadata = InventoryMetadata { imRootActivity :: ActivitySummary @@ -167,6 +173,7 @@ data InventoryMetadata = InventoryMetadata , imResourceFlows :: Int -- Biosphere inputs (resource extraction) } deriving (Generic) + deriving (ToJSON) via (Stripped InventoryMetadata) data InventoryFlowDetail = InventoryFlowDetail { ifdFlow :: BiosphereFlow -- Inventory flows are always biosphere @@ -176,6 +183,7 @@ data InventoryFlowDetail = InventoryFlowDetail , ifdCategory :: Text -- Flow category for grouping } deriving (Generic) + deriving (ToJSON) via (Stripped InventoryFlowDetail) data InventoryStatistics = InventoryStatistics { isTotalQuantity :: Double -- Sum of absolute values @@ -184,6 +192,7 @@ data InventoryStatistics = InventoryStatistics , isTopCategories :: [(Text, Int)] -- Top flow categories by count } deriving (Generic) + deriving (ToJSON) via (Stripped InventoryStatistics) -- | Tree export data structures for visualization data TreeExport = TreeExport @@ -192,6 +201,7 @@ data TreeExport = TreeExport , teEdges :: [TreeEdge] } deriving (Generic) + deriving (ToJSON) via (Stripped TreeExport) data TreeMetadata = TreeMetadata { tmRootId :: Text -- Changed to Text (ProcessId format) @@ -202,6 +212,7 @@ data TreeMetadata = TreeMetadata , tmExpandableNodes :: Int -- Nodes that could expand further } deriving (Generic) + deriving (ToJSON) via (Stripped TreeMetadata) data ExportNode = ExportNode { enId :: Text -- Changed to Text (ProcessId format) @@ -217,6 +228,7 @@ data ExportNode = ExportNode , enCompartment :: Maybe Text -- Biosphere compartment (air/water/soil), only for BiosphereNodes } deriving (Generic) + deriving (ToJSON) via (Stripped ExportNode) data NodeType = ActivityNode | LoopNode | BiosphereEmissionNode | BiosphereResourceNode deriving (Eq, Show, Generic) @@ -233,6 +245,7 @@ data TreeEdge = TreeEdge , teEdgeType :: EdgeType -- Type of edge (technosphere or biosphere) } deriving (Generic) + deriving (ToJSON) via (Stripped TreeEdge) data FlowInfo = FlowInfo { fiId :: UUID @@ -240,6 +253,7 @@ data FlowInfo = FlowInfo , fiCategory :: Text } deriving (Generic) + deriving (ToJSON) via (Stripped FlowInfo) -- | Graph export data structures for network visualization data GraphExport = GraphExport @@ -248,6 +262,7 @@ data GraphExport = GraphExport , geUnitGroups :: M.Map Text Text -- Unit to unit group mapping } deriving (Generic) + deriving (ToJSON) via (Stripped GraphExport) data GraphNode = GraphNode { gnNodeId :: Int -- Numeric ID for efficient frontend processing @@ -258,6 +273,7 @@ data GraphNode = GraphNode , gnLocation :: Text -- Geography } deriving (Generic) + deriving (ToJSON) via (Stripped GraphNode) data GraphEdge = GraphEdge { geSource :: Int -- Source node ID @@ -267,6 +283,7 @@ data GraphEdge = GraphEdge , geFlowName :: Text -- Name of the flow } deriving (Generic) + deriving (ToJSON) via (Stripped GraphEdge) {- | Lightweight flow information for lists. Carries either a tech or a bio flow; the @ApiFlow@ tag is the wire discriminator. @@ -278,6 +295,7 @@ data FlowSummary = FlowSummary , fsRole :: FlowRole -- Role in this specific activity } deriving (Generic) + deriving (ToJSON) via (Stripped FlowSummary) -- | Role of a flow in a specific activity context data FlowRole = InputFlow | OutputFlow | ReferenceProductFlow @@ -295,12 +313,14 @@ data MethodSummary = MethodSummary , msmCollection :: Text -- Parent collection name (e.g., "ef-31") } deriving (Generic) + deriving (ToJSON) via (Stripped MethodSummary) -- | Method collection list response newtype MethodCollectionListResponse = MethodCollectionListResponse { mclMethods :: [MethodCollectionStatusAPI] } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped MethodCollectionListResponse) -- | Method collection status for API responses data MethodCollectionStatusAPI = MethodCollectionStatusAPI @@ -314,12 +334,14 @@ data MethodCollectionStatusAPI = MethodCollectionStatusAPI , mcaFormat :: Maybe Text -- Format (e.g. "ILCD") } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped MethodCollectionStatusAPI) -- | Reference data list response (flow synonyms, compartment mappings, units) newtype RefDataListResponse = RefDataListResponse { rdlItems :: [RefDataStatusAPI] } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped RefDataListResponse) -- | Reference data status for API responses data RefDataStatusAPI = RefDataStatusAPI @@ -332,12 +354,14 @@ data RefDataStatusAPI = RefDataStatusAPI , rdaEntryCount :: Int } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped RefDataStatusAPI) -- | Synonym groups response newtype SynonymGroupsResponse = SynonymGroupsResponse { sgrGroups :: [[Text]] } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped SynonymGroupsResponse) -- | Full method details data MethodDetail = MethodDetail @@ -350,6 +374,7 @@ data MethodDetail = MethodDetail , mdFactorCount :: Int } deriving (Generic) + deriving (ToJSON) via (Stripped MethodDetail) -- | Characterization factor for API response data MethodFactorAPI = MethodFactorAPI @@ -359,6 +384,7 @@ data MethodFactorAPI = MethodFactorAPI , mfaValue :: Double -- CF value } deriving (Generic) + deriving (ToJSON) via (Stripped MethodFactorAPI) -- | A single flow's contribution to an LCIA score data FlowContributionEntry = FlowContributionEntry @@ -371,6 +397,7 @@ data FlowContributionEntry = FlowContributionEntry , fcoCfValue :: Double -- Raw characterization factor value } deriving (Generic) + deriving (ToJSON) via (Stripped FlowContributionEntry) -- | LCIA result for a single impact category data LCIAResult = LCIAResult @@ -387,6 +414,7 @@ data LCIAResult = LCIAResult , lrTopContributors :: [FlowContributionEntry] -- Top contributing elementary flows } deriving (Generic) + deriving (ToJSON) via (Stripped LCIAResult) -- | Contributing flows result: top elementary flows for a specific impact category data ContributingFlowsResult = ContributingFlowsResult @@ -396,6 +424,7 @@ data ContributingFlowsResult = ContributingFlowsResult , cfrTopFlows :: [FlowContributionEntry] } deriving (Generic) + deriving (ToJSON) via (Stripped ContributingFlowsResult) -- | A single activity's contribution to an LCIA score data ActivityContribution = ActivityContribution @@ -407,6 +436,7 @@ data ActivityContribution = ActivityContribution , acSharePct :: Double -- Percentage of total score (0-100) } deriving (Generic) + deriving (ToJSON) via (Stripped ActivityContribution) -- | Contributing activities result: top upstream activities for a specific impact category data ContributingActivitiesResult = ContributingActivitiesResult @@ -416,12 +446,14 @@ data ContributingActivitiesResult = ContributingActivitiesResult , carActivities :: [ActivityContribution] } deriving (Generic) + deriving (ToJSON) via (Stripped ContributingActivitiesResult) -- | Batch impacts request: compute LCIA for every process in one call. newtype BatchImpactsRequest = BatchImpactsRequest { birProcessIds :: [Text] } deriving (Generic) + deriving (FromJSON) via (Stripped BatchImpactsRequest) -- | One entry of a batch impacts response. data BatchImpactsEntry = BatchImpactsEntry @@ -430,6 +462,7 @@ data BatchImpactsEntry = BatchImpactsEntry , bieImpacts :: LCIABatchResult } deriving (Generic) + deriving (ToJSON) via (Stripped BatchImpactsEntry) {- | Batch impacts response: one entry per successfully computed process, plus lists of process ids that could not be resolved. @@ -440,6 +473,7 @@ data BatchImpactsResponse = BatchImpactsResponse , birInvalid :: [Text] } deriving (Generic) + deriving (ToJSON) via (Stripped BatchImpactsResponse) {- | A single scoring-set indicator: the per-variable normalized-weighted value plus the impact category it came from. Value is pre-multiplied by the @@ -450,6 +484,7 @@ data ScoringIndicator = ScoringIndicator , siValue :: Double } deriving (Generic) + deriving (ToJSON) via (Stripped ScoringIndicator) -- | Batch LCIA result with optional single score data LCIABatchResult = LCIABatchResult @@ -466,6 +501,7 @@ data LCIABatchResult = LCIABatchResult -- ^ Scoring set name → (variable name → indicator). One row per scoring variable. } deriving (Generic) + deriving (ToJSON) via (Stripped LCIABatchResult) -- | Flow mapping status for a method data MappingStatus = MappingStatus @@ -483,6 +519,7 @@ data MappingStatus = MappingStatus , mstUnmappedFlows :: [UnmappedFlowAPI] -- Details of unmapped flows } deriving (Generic) + deriving (ToJSON) via (Stripped MappingStatus) -- | Details about an unmapped flow data UnmappedFlowAPI = UnmappedFlowAPI @@ -491,6 +528,7 @@ data UnmappedFlowAPI = UnmappedFlowAPI , ufaDirection :: Text -- "Input" or "Output" } deriving (Generic) + deriving (ToJSON) via (Stripped UnmappedFlowAPI) -- | DB-flow-centric mapping: all biosphere flows with their CF assignments data FlowCFMapping = FlowCFMapping @@ -501,6 +539,7 @@ data FlowCFMapping = FlowCFMapping , fcmFlows :: [FlowCFEntry] } deriving (Generic) + deriving (ToJSON) via (Stripped FlowCFMapping) -- | A single DB biosphere flow with its CF assignment (if any) data FlowCFEntry = FlowCFEntry @@ -512,6 +551,7 @@ data FlowCFEntry = FlowCFEntry , fceMatchStrategy :: Maybe Text -- "uuid" | "name" | "synonym" } deriving (Generic) + deriving (ToJSON) via (Stripped FlowCFEntry) -- | Characterization result: matched CFs for a method in a database data CharacterizationResult = CharacterizationResult @@ -522,6 +562,7 @@ data CharacterizationResult = CharacterizationResult , chrFactors :: [CharacterizationEntry] } deriving (Generic) + deriving (ToJSON) via (Stripped CharacterizationResult) -- | A single matched characterization factor data CharacterizationEntry = CharacterizationEntry @@ -537,12 +578,14 @@ data CharacterizationEntry = CharacterizationEntry , cheMatchStrategy :: Text -- "uuid", "cas", "name", "synonym", "fuzzy" } deriving (Generic) + deriving (ToJSON) via (Stripped CharacterizationEntry) -- | Database list response newtype DatabaseListResponse = DatabaseListResponse { dlrDatabases :: [DatabaseStatusAPI] -- All available databases } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped DatabaseListResponse) -- | Database status for API responses data DatabaseStatusAPI = DatabaseStatusAPI @@ -558,6 +601,7 @@ data DatabaseStatusAPI = DatabaseStatusAPI , dsaDependsOn :: [Text] -- Names of databases this one depends on (for cross-DB linking) } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped DatabaseStatusAPI) -- | Response for database activation data ActivateResponse = ActivateResponse @@ -566,6 +610,7 @@ data ActivateResponse = ActivateResponse , arDatabase :: Maybe DatabaseStatusAPI } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivateResponse) {- | Response for the re-link endpoint: fresh cross-DB link stats after a second-pass linking against the currently-loaded databases. @@ -578,18 +623,21 @@ data RelinkResponse = RelinkResponse , rrDependsOn :: [Text] } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped RelinkResponse) -- | Result of auto-loading a single dependency data DepLoadResult = DepLoaded {dlrName :: Text} | DepLoadFailed {dlfName :: Text, dlfError :: Text} deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped DepLoadResult) -- | Response for the load database endpoint data LoadDatabaseResponse = LoadFailed {ldrError :: Text} | LoadSucceeded {ldrDatabase :: DatabaseStatusAPI, ldrDeps :: [DepLoadResult]} deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped LoadDatabaseResponse) -- | Request for database upload (base64-encoded ZIP) data UploadRequest = UploadRequest @@ -598,6 +646,7 @@ data UploadRequest = UploadRequest , urFileData :: Text -- Base64-encoded ZIP file content } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped UploadRequest) -- | Response for database upload data UploadResponse = UploadResponse @@ -607,6 +656,7 @@ data UploadResponse = UploadResponse , uprFormat :: Maybe Text -- Detected format (if successful) } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped UploadResponse) -- | Supply chain response — all upstream activities with scaling factors data SupplyChainResponse = SupplyChainResponse @@ -617,6 +667,7 @@ data SupplyChainResponse = SupplyChainResponse , scrEdges :: [SupplyChainEdge] } deriving (Generic) + deriving (ToJSON) via (Stripped SupplyChainResponse) {- | A single entry in the supply chain. @sceProcessId@ is bare for entries from the root DB and qualified (@"dbName::pid"@) for entries reached via @@ -636,6 +687,7 @@ data SupplyChainEntry = SupplyChainEntry , sceUpstreamCount :: Int -- number of unique upstream activities reachable from this one } deriving (Generic) + deriving (ToJSON) via (Stripped SupplyChainEntry) -- | An edge in the upstream supply chain subgraph data SupplyChainEdge = SupplyChainEdge @@ -646,6 +698,7 @@ data SupplyChainEdge = SupplyChainEdge , sceEdgeAmount :: Double -- technosphere coefficient } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped SupplyChainEdge) {- | Request body for POST endpoints that accept substitutions. Substitutions modify the scaling vector via Sherman-Morrison rank-1 updates. @@ -654,6 +707,7 @@ newtype SubstitutionRequest = SubstitutionRequest { srSubstitutions :: [Substitution] } deriving (Generic) + deriving (FromJSON) via (Stripped SubstitutionRequest) {- | A single supplier substitution. @@ -670,6 +724,7 @@ data Substitution = Substitution , subConsumer :: Text -- Consumer activity ProcessId (bare or dbName::pid) } deriving (Generic) + deriving (FromJSON) via (Stripped Substitution) {- | A single rank-1 perturbation of a technosphere coefficient @A_ij@. @@ -684,12 +739,14 @@ data Perturbation = Perturbation , perLabel :: Maybe Text -- Optional label for response correlation } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped Perturbation) -- | Request body for POST sensitivity endpoints. Flat list, V1. newtype SensitivityRequest = SensitivityRequest { srPerturbations :: [Perturbation] } deriving (Generic) + deriving (FromJSON) via (Stripped SensitivityRequest) {- | One result entry per perturbation. The 'peResult' carries either an error message ('Left') or the (impact, deltaImpact) pair ('Right'). The @@ -710,6 +767,7 @@ data SensitivityResponse = SensitivityResponse , srPerturbed :: [PerturbedEntry] } deriving (Generic) + deriving (ToJSON) via (Stripped SensitivityResponse) {- | Name of the request-level "root" database — the DB extracted from the URL path and the implicit target of any bare 'ProcessId' (one without the @@ -759,6 +817,7 @@ data ExchangeWithUnit = ExchangeWithUnit , ewuPedigree :: Maybe Pedigree -- LCA data-quality scores when available (mirrors exchangePedigree) } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ExchangeWithUnit) -- | Activity information optimized for API responses data ActivityForAPI = ActivityForAPI @@ -776,6 +835,7 @@ data ActivityForAPI = ActivityForAPI , pfaExchanges :: [ExchangeWithUnit] -- Exchanges with unit names } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivityForAPI) -- | Streamlined activity information - core data only data ActivityInfo = ActivityInfo @@ -785,6 +845,7 @@ data ActivityInfo = ActivityInfo , piLinks :: ActivityLinks -- Links to sub-resources } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivityInfo) -- | Extended activity metadata data ActivityMetadata = ActivityMetadata @@ -796,6 +857,7 @@ data ActivityMetadata = ActivityMetadata , pmReferenceProductFlow :: Maybe UUID -- Flow ID of reference product } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivityMetadata) -- | Links to related resources data ActivityLinks = ActivityLinks @@ -805,6 +867,7 @@ data ActivityLinks = ActivityLinks , plReferenceProductUrl :: Maybe Text -- URL to reference product (if exists) } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivityLinks) -- | Activity statistics data ActivityStats = ActivityStats @@ -814,6 +877,7 @@ data ActivityStats = ActivityStats , psLocation :: Text } deriving (Generic) + deriving (ToJSON, FromJSON) via (Stripped ActivityStats) -- | Flow with additional metadata. Carries either a tech or bio flow. data FlowDetail = FlowDetail @@ -822,6 +886,7 @@ data FlowDetail = FlowDetail , fdUsageCount :: Int -- How many activities use this flow } deriving (Generic) + deriving (ToJSON) via (Stripped FlowDetail) {- | Exchange with flow, unit, and target activity information. The carried flow's variant lines up with the Exchange variant. @@ -835,6 +900,7 @@ data ExchangeDetail = ExchangeDetail , edTargetActivity :: Maybe ActivitySummary -- Target activity for technosphere inputs } deriving (Generic) + deriving (ToJSON) via (Stripped ExchangeDetail) -- | A single filter entry returned in a preset data ClassificationEntryInfo = ClassificationEntryInfo @@ -843,6 +909,7 @@ data ClassificationEntryInfo = ClassificationEntryInfo , ceiMode :: !Text -- "exact" or "contains" } deriving (Show, Eq, Generic) + deriving (ToJSON) via (Stripped ClassificationEntryInfo) -- | A named filter preset (from TOML config) data ClassificationPresetInfo = ClassificationPresetInfo @@ -852,6 +919,7 @@ data ClassificationPresetInfo = ClassificationPresetInfo , cpiFilters :: ![ClassificationEntryInfo] } deriving (Show, Eq, Generic) + deriving (ToJSON) via (Stripped ClassificationPresetInfo) -- | Classification system with its values for browsing/filtering data ClassificationSystem = ClassificationSystem @@ -860,6 +928,7 @@ data ClassificationSystem = ClassificationSystem , csActivityCount :: Int -- How many activities have this system } deriving (Generic) + deriving (ToJSON) via (Stripped ClassificationSystem) {- | Result of an /activity/{pid}/aggregate call. @@ -874,6 +943,7 @@ data Aggregation = Aggregation , aggGroups :: [AggregationGroup] -- one entry per group_by bucket (empty when group_by omitted) } deriving (Generic) + deriving (ToJSON) via (Stripped Aggregation) -- | One bucket in an aggregation result. data AggregationGroup = AggregationGroup @@ -884,78 +954,16 @@ data AggregationGroup = AggregationGroup , aggCount :: Int } deriving (Generic) + deriving (ToJSON) via (Stripped AggregationGroup) -- JSON instances. All record types use API.JsonOptions.stripLowerPrefix -- via the strippedToJSON/strippedToEncoding/strippedParseJSON helpers. -- Sum-only types (NodeType, EdgeType, FlowRole) keep default derivation. -instance ToJSON ConsumerResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON ConsumerResult where parseJSON = strippedParseJSON -instance ToJSON ConsumersResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON ConsumersResponse where parseJSON = strippedParseJSON -instance ToJSON ClassificationEntryInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ClassificationPresetInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ClassificationSystem where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON Aggregation where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON AggregationGroup where toJSON = strippedToJSON; toEncoding = strippedToEncoding instance (ToJSON a) => ToJSON (SearchResults a) where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivitySummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowSearchResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON InventoryMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON InventoryStatistics where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON TreeExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON TreeMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ExportNode where toJSON = strippedToJSON; toEncoding = strippedToEncoding instance ToJSON NodeType instance ToJSON EdgeType -instance ToJSON TreeEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding instance ToJSON FlowRole -instance ToJSON ExchangeWithUnit where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityForAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityInfo where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityMetadata where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityLinks where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityStats where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON InventoryFlowDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowSummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON InventoryExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ExchangeDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding instance ToJSON Unit where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON GraphExport where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON GraphNode where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON GraphEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MethodSummary where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MethodCollectionListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MethodCollectionStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MethodDetail where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MethodFactorAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowContributionEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON LCIAResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ScoringIndicator where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON LCIABatchResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON BatchImpactsEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON BatchImpactsResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON BatchImpactsRequest where parseJSON = strippedParseJSON -instance ToJSON ContributingFlowsResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivityContribution where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ContributingActivitiesResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON MappingStatus where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON UnmappedFlowAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowCFMapping where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON FlowCFEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON CharacterizationResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON CharacterizationEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON SupplyChainResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON SupplyChainEntry where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON SupplyChainEdge where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON SupplyChainEdge where parseJSON = strippedParseJSON -instance FromJSON SubstitutionRequest where parseJSON = strippedParseJSON -instance FromJSON Substitution where parseJSON = strippedParseJSON -instance FromJSON SensitivityRequest where parseJSON = strippedParseJSON -instance FromJSON Perturbation where parseJSON = strippedParseJSON -instance ToJSON Perturbation where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON SensitivityResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -- Custom ToJSON for PerturbedEntry: flatten the Either so success entries -- have impact+deltaImpact and error entries have error. @@ -968,37 +976,6 @@ instance ToJSON PerturbedEntry where -- FromJSON instances needed for API conversion instance (FromJSON a) => FromJSON (SearchResults a) where parseJSON = strippedParseJSON -instance FromJSON ActivitySummary where parseJSON = strippedParseJSON -instance FromJSON ActivityInfo where parseJSON = strippedParseJSON -instance FromJSON ActivityForAPI where parseJSON = strippedParseJSON -instance FromJSON ActivityMetadata where parseJSON = strippedParseJSON -instance FromJSON ActivityLinks where parseJSON = strippedParseJSON -instance FromJSON ActivityStats where parseJSON = strippedParseJSON -instance FromJSON ExchangeWithUnit where parseJSON = strippedParseJSON -instance ToJSON DatabaseListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON DatabaseStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON ActivateResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON RelinkResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON DepLoadResult where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON LoadDatabaseResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON UploadRequest where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON UploadResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON DatabaseListResponse where parseJSON = strippedParseJSON -instance FromJSON DatabaseStatusAPI where parseJSON = strippedParseJSON -instance FromJSON ActivateResponse where parseJSON = strippedParseJSON -instance FromJSON RelinkResponse where parseJSON = strippedParseJSON -instance FromJSON DepLoadResult where parseJSON = strippedParseJSON -instance FromJSON LoadDatabaseResponse where parseJSON = strippedParseJSON -instance FromJSON UploadRequest where parseJSON = strippedParseJSON -instance FromJSON UploadResponse where parseJSON = strippedParseJSON -instance FromJSON MethodCollectionListResponse where parseJSON = strippedParseJSON -instance FromJSON MethodCollectionStatusAPI where parseJSON = strippedParseJSON -instance ToJSON RefDataListResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON RefDataStatusAPI where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance ToJSON SynonymGroupsResponse where toJSON = strippedToJSON; toEncoding = strippedToEncoding -instance FromJSON RefDataListResponse where parseJSON = strippedParseJSON -instance FromJSON RefDataStatusAPI where parseJSON = strippedParseJSON -instance FromJSON SynonymGroupsResponse where parseJSON = strippedParseJSON -- openapi3 cannot derive ToSchema for BSL.ByteString directly newtype BinaryContent = BinaryContent BSL.ByteString From 492c26c9e89cfc6bcd2b5c25b754ec1be075481e Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:47:35 +0200 Subject: [PATCH 03/43] refactor(types): collapse 6 multi-line JSON instances via Stripped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the manual `instance ToJSON / FromJSON … where toJSON = genericToJSON stripLowerPrefix; …` blocks for Pedigree, Exchange, Compartment, TechnosphereFlow, BiosphereFlow, and WasteFlow into attached `deriving (ToJSON, FromJSON) via (Stripped X)` clauses on each data declaration. TechRole and BioDirection keep their empty `instance ToJSON …` declarations (default Generic encoding for nullary-constructor sum types — the wire form is the constructor name as a JSON string). openapi.json byte-identical against the pre-branch baseline; hspec suite green at 1052/1052. --- src/Types.hs | 52 ++++++++++++---------------------------------------- 1 file changed, 12 insertions(+), 40 deletions(-) diff --git a/src/Types.hs b/src/Types.hs index ad5ba491..47ddec62 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingVia #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} @@ -12,9 +13,9 @@ module Types ( UUID, ) where -import API.JsonOptions (stripLowerPrefix) +import API.JsonOptions (Stripped (..)) import Control.DeepSeq (NFData) -import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToEncoding, genericToJSON) +import Data.Aeson (FromJSON (..), ToJSON (..)) import Data.Int (Int32) import qualified Data.IntSet as IS import qualified Data.Map as M @@ -64,6 +65,7 @@ data Compartment = Compartment , compartmentSub :: !(Maybe Text) -- "high. pop.", "river water", … } deriving (Eq, Show, Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped Compartment) {- | The biosphere flow's medium (air | water | soil | …), or @""@ when the source dataset omitted the compartment. Use 'bfCompartment' directly when @@ -135,6 +137,7 @@ data TechnosphereFlow = TechnosphereFlow , tfSubstanceId :: !(Maybe Int) } deriving (Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped TechnosphereFlow) {- | A biosphere flow — an environmental exchange (resource extraction or emission). Always carries a `Compartment` identifying the medium. @@ -154,6 +157,7 @@ data BiosphereFlow = BiosphereFlow -} } deriving (Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped BiosphereFlow) {- | A waste flow — a residual output that a process generates and which a treatment activity may consume as its reference input. Sister type to @@ -175,6 +179,7 @@ data WasteFlow = WasteFlow , wfSubstanceId :: !(Maybe Int) } deriving (Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped WasteFlow) {- | Pedigree matrix (Weidema & Wesnæs 1996) — five LCA data-quality scores each in 1..5 (1 = best, 5 = worst). SimaPro CSV encodes it as a prefix in the @@ -188,6 +193,7 @@ data Pedigree = Pedigree , pedTechnological :: !Int -- 1..5 } deriving (Eq, Show, Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped Pedigree) {- | Smart constructor: rejects out-of-range values (anything not in 1..5) by returning Nothing. Callers should treat Nothing as "no pedigree @@ -237,6 +243,7 @@ data Exchange , waPedigree :: !(Maybe Pedigree) -- LCA data-quality scores when available } deriving (Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped Exchange) -- | Helper functions for Exchange variants exchangeFlowId :: Exchange -> UUID @@ -1195,46 +1202,11 @@ data CF = CF , cfFactor :: !Double -- Characterization factor } --- JSON instances for API compatibility --- Note: ProcessId is Int32, which already has ToJSON/FromJSON instances -instance ToJSON Pedigree where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix -instance FromJSON Pedigree where - parseJSON = genericParseJSON stripLowerPrefix -instance ToJSON Exchange where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix - -instance FromJSON Exchange where - parseJSON = genericParseJSON stripLowerPrefix - +-- ToJSON/FromJSON for the records above are produced via `deriving via (Stripped X)` +-- attached to each `data` declaration. The two enums TechRole and BioDirection use +-- the default Generic encoding (constructor name as JSON string). instance ToJSON TechRole instance FromJSON TechRole instance ToJSON BioDirection instance FromJSON BioDirection - -instance ToJSON Compartment where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix -instance FromJSON Compartment where - parseJSON = genericParseJSON stripLowerPrefix - -instance ToJSON TechnosphereFlow where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix -instance FromJSON TechnosphereFlow where - parseJSON = genericParseJSON stripLowerPrefix - -instance ToJSON BiosphereFlow where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix -instance FromJSON BiosphereFlow where - parseJSON = genericParseJSON stripLowerPrefix - -instance ToJSON WasteFlow where - toJSON = genericToJSON stripLowerPrefix - toEncoding = genericToEncoding stripLowerPrefix -instance FromJSON WasteFlow where - parseJSON = genericParseJSON stripLowerPrefix From 8ae6c9f4a86b5187c2957f2fe8246fe54119bbc9 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:49:36 +0200 Subject: [PATCH 04/43] feat(validation): introduce Validation Applicative for accumulating errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Data.Validation — an Applicative-only type that accumulates errors via the Semigroup on @e@. The canonical example, in Milewski's /Category Theory for Programmers/, of an Applicative that is not a Monad: a lawful Monad instance would have to short-circuit on the first Failure to keep >>= associative w.r.t. the underlying Either behaviour, which would erase exactly the accumulation that motivates the type. So we stop at Applicative. Pure addition — no caller migrated in this commit. The next commit replaces the nested case/throwError ladder in src/API/Routes.hs:1296-1318 (Aggregate query-param validation) with an Applicative chain over Validation (NonEmpty Text), so a request with both an invalid `scope` AND an invalid `aggregate` parameter reports both errors instead of just the first. --- src/Data/Validation.hs | 71 ++++++++++++++++++++++++++++++++++++++++++ volca.cabal | 1 + 2 files changed, 72 insertions(+) create mode 100644 src/Data/Validation.hs diff --git a/src/Data/Validation.hs b/src/Data/Validation.hs new file mode 100644 index 00000000..43efd733 --- /dev/null +++ b/src/Data/Validation.hs @@ -0,0 +1,71 @@ +{- | An Applicative-only validation type that accumulates errors via the +'Semigroup' on @e@ — the canonical example, in Milewski's +/Category Theory for Programmers/, of an Applicative that is /not/ a +Monad. A lawful 'Monad' instance for 'Validation' would have to +short-circuit on the first 'Failure' (to make @>>=@ associative w.r.t. +the underlying 'Either' behaviour); that would erase the accumulation +that motivates the type, so we stop at 'Applicative'. + +Use: + +@ + validateAll + :: Maybe Text -> Maybe Text -> Validation (NonEmpty Text) (Foo, Bar) + validateAll a b = (,) \<$\> validateA a \<*\> validateB b +@ + +When both @validateA@ and @validateB@ fail, the resulting 'Failure' +carries /both/ messages, not just the first. +-} +module Data.Validation ( + Validation (..), + toEither, + fromEither, + failure, + success, +) where + +import Data.List.NonEmpty (NonEmpty) +import qualified Data.List.NonEmpty as NE + +-- | @Failure e@ accumulates via @e@'s 'Semigroup'; @Success a@ propagates. +data Validation e a + = Failure !e + | Success !a + deriving (Eq, Show) + +instance Functor (Validation e) where + fmap _ (Failure e) = Failure e + fmap f (Success a) = Success (f a) + +{- | The 'Applicative' instance is the whole point: when both arguments +are 'Failure', their error payloads are combined via @(\<\>)@. This is the +defining behaviour that distinguishes 'Validation' from 'Either'. +-} +instance Semigroup e => Applicative (Validation e) where + pure = Success + Failure e1 <*> Failure e2 = Failure (e1 <> e2) + Failure e <*> Success _ = Failure e + Success _ <*> Failure e = Failure e + Success f <*> Success a = Success (f a) + +-- | Convert to 'Either' once accumulation is done (e.g. at a Servant boundary). +toEither :: Validation e a -> Either e a +toEither (Failure e) = Left e +toEither (Success a) = Right a + +-- | Inject an 'Either' into 'Validation'. Use 'fromEither' to lift a +-- short-circuiting parser into an accumulating context. +fromEither :: Either e a -> Validation e a +fromEither (Left e) = Failure e +fromEither (Right a) = Success a + +-- | Build a singleton-error failure. Convenient for 'NonEmpty'-keyed +-- validators where each leaf check produces exactly one error message. +failure :: e -> Validation (NonEmpty e) a +failure = Failure . NE.singleton + +-- | Inject a value into 'Success'. Symmetric to 'failure'; useful in +-- chains where readability is helped by an explicit constructor. +success :: a -> Validation e a +success = Success diff --git a/volca.cabal b/volca.cabal index ba29b9ec..458a07d9 100644 --- a/volca.cabal +++ b/volca.cabal @@ -23,6 +23,7 @@ library import: warnings hs-source-dirs: src exposed-modules: Types + , Data.Validation , Progress , UnitConversion , Config From d0ec56e83a0561a7411144475fdb0e1e59bc249a Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:52:36 +0200 Subject: [PATCH 05/43] refactor(api): aggregate validation via Validation Applicative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the nested case/throwError ladder in the aggregate endpoint's parameter validation (Routes.hs:1296-1318) with an Applicative chain over Validation (NonEmpty Text). A request with multiple bad query params (e.g. invalid `scope` AND invalid `aggregate`) now reports every error at once, joined by "; ", instead of just the first. Categorically: Validation accumulates over the Semigroup of its error type. The cross-check between `scope` and `filter_exchange_type` stays in Handler because Validation is not a Monad — by design, since a lawful Monad would have to short-circuit and erase the accumulation. openapi.json byte-identical; hspec suite green at 1052/1052. --- src/API/Routes.hs | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/API/Routes.hs b/src/API/Routes.hs index 15b483b2..e7ff05b9 100644 --- a/src/API/Routes.hs +++ b/src/API/Routes.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeOperators #-} @@ -20,6 +21,7 @@ import Data.Aeson import qualified Data.ByteString.Lazy as BSL import Data.List (find, intercalate, sortBy, sortOn) import qualified Data.List.NonEmpty as NE +import qualified Data.Validation as V import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) import Data.OpenApi (OpenApi, ToSchema) @@ -1293,29 +1295,38 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = groupByParam aggregateParam = do (db, sharedSolver) <- requireDatabaseByName dbManager dbName - scope <- case scopeParam of - Just "direct" -> return Agg.ScopeDirect - Just "supply_chain" -> return Agg.ScopeSupplyChain - Just "biosphere" -> return Agg.ScopeBiosphere - _ -> throwError err400{errBody = "scope must be one of: direct | supply_chain | biosphere"} - exchangeType <- case fexchangeTypeParam of - Nothing -> return Nothing - Just "technosphere" -> return (Just Agg.KindTechnosphere) - Just "biosphere" -> return (Just Agg.KindBiosphere) - Just "waste" -> return (Just Agg.KindWaste) - Just _ -> throwError err400{errBody = "filter_exchange_type must be one of: technosphere | biosphere | waste"} + -- Field-level validation via the Validation Applicative (accumulating). + -- A request with both an invalid `scope` and an invalid `aggregate` + -- now reports both errors at once, instead of just the first. + let parseScope = \case + Just "direct" -> V.Success Agg.ScopeDirect + Just "supply_chain" -> V.Success Agg.ScopeSupplyChain + Just "biosphere" -> V.Success Agg.ScopeBiosphere + _ -> V.failure "scope must be one of: direct | supply_chain | biosphere" + parseExType = \case + Nothing -> V.Success Nothing + Just "technosphere" -> V.Success (Just Agg.KindTechnosphere) + Just "biosphere" -> V.Success (Just Agg.KindBiosphere) + Just "waste" -> V.Success (Just Agg.KindWaste) + Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste" + parseAgg = \case + Nothing -> V.Success Agg.AggSum + Just "sum_quantity" -> V.Success Agg.AggSum + Just "count" -> V.Success Agg.AggCount + Just "share" -> V.Success Agg.AggShare + Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")") + (scope, exchangeType, aggFn) <- + case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of + Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))} + Right v -> pure v + -- Cross-check requires the parsed scope value, so it runs after the + -- Applicative phase. Validation is not a Monad, by design. case (exchangeType, scope) of (Just _, Agg.ScopeBiosphere) -> throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"} (Just _, Agg.ScopeSupplyChain) -> throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"} _ -> return () - aggFn <- case aggregateParam of - Nothing -> return Agg.AggSum - Just "sum_quantity" -> return Agg.AggSum - Just "count" -> return Agg.AggCount - Just "share" -> return Agg.AggShare - Just other -> throwError err400{errBody = "aggregate must be one of: sum_quantity | count | share (got " <> BSL.fromStrict (T.encodeUtf8 other) <> ")"} let presetFilters = expandPreset classificationPresets presetParam explicitFilters = mapMaybe parseClassFilter fclassParams params = From d9cdea8ea59b0fdd27f296491d3e74ccc1a7fe8c Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:55:21 +0200 Subject: [PATCH 06/43] refactor(loader): UnlinkedSummary becomes a Monoid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand-rolled emptyUnlinkedSummary / mergeUnlinkedSummaries pair with a Semigroup + Monoid instance on UnlinkedSummary. Call sites that used `foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries` collapse to `mconcat summaries`; the scattered `emptyUnlinkedSummary` constructor calls in fixExchangeLink and fixExchangeLinkByName become `mempty`. Categorically: UnlinkedSummary is the product of four monoids — a Map T.Text [UnlinkedExchange] under unionWith (++), plus three Int counters under addition. The instance is hand-written rather than derived via Generically because bare Int has no canonical Monoid (Sum vs Product is ambiguous) and wrapping the fields as Sum Int would have leaked through every constructor and accessor. Tests in test/LoaderSpec.hs updated to exercise the new <>/mempty surface. --- src/Database/Loader.hs | 48 ++++++++++++++++++++---------------------- test/LoaderSpec.hs | 16 +++++++------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs index 0898d18a..405a240d 100644 --- a/src/Database/Loader.hs +++ b/src/Database/Loader.hs @@ -68,8 +68,6 @@ module Database.Loader ( generateActivityUUIDFromActivity, getReferenceProductUUID, UnlinkedSummary (..), - emptyUnlinkedSummary, - mergeUnlinkedSummaries, buildSupplierIndex, buildSupplierIndexByName, fixExchangeLinkByName, @@ -234,7 +232,14 @@ data UnlinkedExchange = UnlinkedExchange } deriving (Eq, Ord, Show) --- | Summary of unlinked exchanges grouped by consumer activity +{- | Summary of unlinked exchanges grouped by consumer activity. + +Categorically the product of four monoids: +'M.unionWith (++)' on the activity map, and addition on each 'Int' +counter. We hand-write the instance because bare 'Int' has no canonical +'Monoid' (Sum vs Product is ambiguous); wrapping the fields as 'Sum Int' +would have leaked through every constructor and accessor. +-} data UnlinkedSummary = UnlinkedSummary { usActivities :: !(M.Map T.Text [UnlinkedExchange]) -- consumer name → list of unlinked exchanges , usTotalLinks :: !Int @@ -243,19 +248,12 @@ data UnlinkedSummary = UnlinkedSummary } deriving (Show) --- | Empty unlinked summary -emptyUnlinkedSummary :: UnlinkedSummary -emptyUnlinkedSummary = UnlinkedSummary M.empty 0 0 0 +instance Semigroup UnlinkedSummary where + UnlinkedSummary a1 t1 f1 m1 <> UnlinkedSummary a2 t2 f2 m2 = + UnlinkedSummary (M.unionWith (++) a1 a2) (t1 + t2) (f1 + f2) (m1 + m2) --- | Merge two unlinked summaries -mergeUnlinkedSummaries :: UnlinkedSummary -> UnlinkedSummary -> UnlinkedSummary -mergeUnlinkedSummaries s1 s2 = - UnlinkedSummary - { usActivities = M.unionWith (++) (usActivities s1) (usActivities s2) - , usTotalLinks = usTotalLinks s1 + usTotalLinks s2 - , usFoundLinks = usFoundLinks s1 + usFoundLinks s2 - , usMissingLinks = usMissingLinks s1 + usMissingLinks s2 - } +instance Monoid UnlinkedSummary where + mempty = UnlinkedSummary M.empty 0 0 0 -- | Report grouped summary of unlinked exchanges reportUnlinkedSummary :: UnlinkedSummary -> IO () @@ -412,7 +410,7 @@ fixAllActivities :: ExchangeLinkContext -> ActivityMap -> (ActivityMap, Unlinked fixAllActivities ctx activities = let results = M.map (fixActivityExchanges ctx) activities summaries = map snd $ M.elems results - combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries + combinedSummary = mconcat summaries fixedActivities = M.map fst results in (fixedActivities, combinedSummary) @@ -420,7 +418,7 @@ fixAllActivities ctx activities = fixActivityExchanges :: ExchangeLinkContext -> Activity -> (Activity, UnlinkedSummary) fixActivityExchanges ctx act = let (fixedExchanges, summaries) = unzip $ map (fixExchangeLink ctx (activityName act)) (exchanges act) - combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries + combinedSummary = mconcat summaries in (act{exchanges = fixedExchanges}, combinedSummary) {- | Fix a single exchange's activity link by (flowName, location) match. @@ -462,13 +460,13 @@ fixExchangeLink ExchangeLinkContext{..} consumerName ex@TechnosphereExchange{tec Nothing -> unlinked flow lookupLoc Nothing -> (ex, UnlinkedSummary M.empty 1 0 1) - | otherwise = (ex, emptyUnlinkedSummary) -fixExchangeLink _ _ ex@BiosphereExchange{} = (ex, emptyUnlinkedSummary) + | otherwise = (ex, mempty) +fixExchangeLink _ _ ex@BiosphereExchange{} = (ex, mempty) -- A WasteExchange in input direction (consumed by treatment) would benefit -- from the same supplier-lookup logic as a technosphere Input, but at this -- stage we leave waste links to the cross-DB linker (see CrossLinking) and -- the downstream parsers. Pure pass-through here. -fixExchangeLink _ _ ex@WasteExchange{} = (ex, emptyUnlinkedSummary) +fixExchangeLink _ _ ex@WasteExchange{} = (ex, mempty) {- | Load all EcoSpold files with optimized parallel processing and deduplication. @@ -567,7 +565,7 @@ fixAllActivitiesByName :: NameOnlyIndex -> TechFlowDB -> ActivityMap -> (Activit fixAllActivitiesByName idx techFlowDb activities = let results = M.map (fixActivityExchangesByName idx techFlowDb) activities summaries = map snd $ M.elems results - combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries + combinedSummary = mconcat summaries fixedActivities = M.map fst results in (fixedActivities, combinedSummary) @@ -575,7 +573,7 @@ fixAllActivitiesByName idx techFlowDb activities = fixActivityExchangesByName :: NameOnlyIndex -> TechFlowDB -> Activity -> (Activity, UnlinkedSummary) fixActivityExchangesByName idx techFlowDb act = let (fixedExchanges, summaries) = unzip $ map (fixExchangeLinkByName idx techFlowDb (activityName act)) (exchanges act) - combinedSummary = foldr mergeUnlinkedSummaries emptyUnlinkedSummary summaries + combinedSummary = mconcat summaries in (act{exchanges = fixedExchanges}, combinedSummary) {- | Fix a single exchange's activity link using name-only matching. @@ -608,10 +606,10 @@ fixExchangeLinkByName idx techFlowDb consumerName ex@TechnosphereExchange{techFl Nothing -> -- Flow not in technosphere map — shouldn't happen but be safe (ex, UnlinkedSummary M.empty 1 0 1) - | otherwise = (ex, emptyUnlinkedSummary) -- Reference products: nothing to relink -fixExchangeLinkByName _ _ _ ex@BiosphereExchange{} = (ex, emptyUnlinkedSummary) + | otherwise = (ex, mempty) -- Reference products: nothing to relink +fixExchangeLinkByName _ _ _ ex@BiosphereExchange{} = (ex, mempty) -- Waste link resolution is deferred to the cross-DB linker path. -fixExchangeLinkByName _ _ _ ex@WasteExchange{} = (ex, emptyUnlinkedSummary) +fixExchangeLinkByName _ _ _ ex@WasteExchange{} = (ex, mempty) -- | Load EcoSpold files from directory loadEcoSpoldDirectory :: M.Map T.Text T.Text -> FilePath -> IO (Either T.Text SimpleDatabase) diff --git a/test/LoaderSpec.hs b/test/LoaderSpec.hs index afc0da3c..a51d16d0 100644 --- a/test/LoaderSpec.hs +++ b/test/LoaderSpec.hs @@ -143,26 +143,26 @@ spec = do getReferenceProductUUID act `shouldBe` UUID.nil -- ----------------------------------------------------------------------- - -- mergeUnlinkedSummaries + -- UnlinkedSummary Monoid (product of monoids: Map-union + 3× Int addition) -- ----------------------------------------------------------------------- - describe "mergeUnlinkedSummaries" $ do - it "sums all counters" $ do + describe "UnlinkedSummary Monoid" $ do + it "sums all counters via (<>)" $ do let s1 = UnlinkedSummary M.empty 10 8 2 s2 = UnlinkedSummary M.empty 5 3 2 - m = mergeUnlinkedSummaries s1 s2 + m = s1 <> s2 usTotalLinks m `shouldBe` 15 usFoundLinks m `shouldBe` 11 usMissingLinks m `shouldBe` 4 - it "unions activity maps" $ do + it "unions activity maps via (<>)" $ do let s1 = UnlinkedSummary (M.singleton "actA" []) 1 0 1 s2 = UnlinkedSummary (M.singleton "actB" []) 1 0 1 - m = mergeUnlinkedSummaries s1 s2 + m = s1 <> s2 M.size (usActivities m) `shouldBe` 2 - it "emptyUnlinkedSummary is the identity" $ do + it "mempty is the identity" $ do let s = UnlinkedSummary M.empty 3 2 1 - m = mergeUnlinkedSummaries s emptyUnlinkedSummary + m = s <> mempty usTotalLinks m `shouldBe` 3 usFoundLinks m `shouldBe` 2 usMissingLinks m `shouldBe` 1 From 4c1ceaaebdbea019864612e212cb9b8177b409f2 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 00:56:59 +0200 Subject: [PATCH 07/43] feat(app): introduce AppM = ReaderT AppEnv Handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add App.Env exposing: - AppEnv: the read-only application environment (DatabaseManager, max tree depth, optional password, hosting config, classification presets) currently threaded explicitly through five lcaServer parameters and into every handler. - AppM = ReaderT AppEnv Handler, with GeneralizedNewtypeDeriving on Functor / Applicative / Monad / MonadIO / MonadReader / MonadError. - runApp :: AppEnv -> AppM a -> Handler a, the natural transformation AppM ~> Handler that Servant's hoistServer will use to lift a ServerT api AppM into ServerT api Handler at the API boundary. - Five "Has*" capability classes (HasDatabaseManager, HasMaxTreeDepth, HasPassword, HasHostingConfig, HasClassificationPresets) — narrow typeclass projections of the environment so handlers can declare only the slice they consume. Pure addition. No caller migrated in this commit. The next commit threads AppM through lcaServer and the ~30 handler bodies, removing the requireDatabaseByName + getMergedUnitConfig boilerplate that currently begins every handler. Categorically: AppM lives in the Kleisli category of Reader AppEnv lifted into Handler's Kleisli category. The Has-pattern is the projection morphism from the environment object onto each component. --- src/App/Env.hs | 129 +++++++++++++++++++++++++++++++++++++++++++++++++ volca.cabal | 1 + 2 files changed, 130 insertions(+) create mode 100644 src/App/Env.hs diff --git a/src/App/Env.hs b/src/App/Env.hs new file mode 100644 index 00000000..9cd1d3b6 --- /dev/null +++ b/src/App/Env.hs @@ -0,0 +1,129 @@ +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE RankNTypes #-} + +{- | The 'AppM' capability monad and the read-only environment that every +HTTP handler closes over. + +The categorical structure: 'AppM' is a 'Reader' monad transformer over +Servant's 'Handler', so it lives in the Kleisli category of @Reader +AppEnv@ lifted into @Handler@'s Kleisli category. 'hoistServer' (used +in 'API.Routes.lcaServer') is the natural transformation +@forall a. AppM a -> Handler a@ that turns a @ServerT api AppM@ into a +@ServerT api Handler@ — Servant doesn't care about the monad, as long +as we can collapse it down to 'Handler' at the boundary. + +The 'Has*' typeclasses are narrow capability witnesses: a function with +@(MonadReader r m, HasDatabaseManager r, MonadIO m) => m a@ declares +exactly the slice of the environment it needs, without committing to +the concrete monad. This is the "Has-pattern" of mtl-style: capability +classes are *projections* out of the environment object. +-} +module App.Env ( + -- * Environment + AppEnv (..), + mkAppEnv, + + -- * Monad + AppM (..), + runApp, + + -- * Capability classes (Has-pattern) + HasDatabaseManager (..), + HasMaxTreeDepth (..), + HasPassword (..), + HasHostingConfig (..), + HasClassificationPresets (..), +) where + +import Control.Monad.Except (MonadError) +import Control.Monad.IO.Class (MonadIO) +import Control.Monad.Reader (MonadReader, ReaderT (..)) +import qualified Config +import Database.Manager (DatabaseManager) +import Servant (Handler, ServerError) + +-- | Read-only application environment threaded through every request. +data AppEnv = AppEnv + { aeDbManager :: !DatabaseManager + , aeMaxTreeDepth :: !Int + , aePassword :: !(Maybe String) + , aeHostingConfig :: !(Maybe Config.HostingConfig) + , aeClassificationPresets :: ![Config.ClassificationPreset] + } + +-- | Smart constructor — keeps callers from positionally swapping fields. +mkAppEnv + :: DatabaseManager + -> Int + -> Maybe String + -> Maybe Config.HostingConfig + -> [Config.ClassificationPreset] + -> AppEnv +mkAppEnv = AppEnv + +{- | Servant 'Handler' threaded with a read-only 'AppEnv'. Deriving the +@MonadReader@ / @MonadIO@ / @MonadError ServerError@ instances via +@GeneralizedNewtypeDeriving@ keeps the wrapper free at runtime: 'AppM' +is representationally a function @AppEnv -> IO (Either ServerError a)@ +under the hood, identical to 'Handler' modulo the @AppEnv@ argument. +-} +newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a} + deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError) + +{- | Discharge an 'AppM' computation against a concrete environment, +producing a plain Servant 'Handler'. Use in 'API.Routes.lcaServer' as +@hoistServer lcaAPI (runApp env) handlers@. + +This is the natural transformation @AppM ~> Handler@ that Servant's +@hoistServer@ requires; it lifts the entire @ServerT api AppM@ into +@ServerT api Handler@ point-free. +-} +runApp :: AppEnv -> AppM a -> Handler a +runApp env (AppM m) = runReaderT m env + +-- --------------------------------------------------------------------------- +-- Has-pattern: narrow capability witnesses +-- --------------------------------------------------------------------------- + +{- | Witness that the environment exposes a 'DatabaseManager'. Handlers +that need DB access should constrain on @HasDatabaseManager r@ rather +than the concrete 'AppEnv', so the same code can run in tests with a +narrower env. +-} +class HasDatabaseManager r where + getDatabaseManager :: r -> DatabaseManager + +instance HasDatabaseManager AppEnv where + getDatabaseManager = aeDbManager + +-- | Max tree depth limit (anti-DoS guard for /tree and /graph endpoints). +class HasMaxTreeDepth r where + getMaxTreeDepth :: r -> Int + +instance HasMaxTreeDepth AppEnv where + getMaxTreeDepth = aeMaxTreeDepth + +-- | Optional admin password gating @POST /auth@. +class HasPassword r where + getPassword :: r -> Maybe String + +instance HasPassword AppEnv where + getPassword = aePassword + +-- | Hosting configuration consumed by @GET /hosting@. +class HasHostingConfig r where + getHostingConfig :: r -> Maybe Config.HostingConfig + +instance HasHostingConfig AppEnv where + getHostingConfig = aeHostingConfig + +-- | Classification presets used in @/aggregate@ and @/supply-chain@ filters. +class HasClassificationPresets r where + getClassificationPresets :: r -> [Config.ClassificationPreset] + +instance HasClassificationPresets AppEnv where + getClassificationPresets = aeClassificationPresets diff --git a/volca.cabal b/volca.cabal index 458a07d9..a5269356 100644 --- a/volca.cabal +++ b/volca.cabal @@ -23,6 +23,7 @@ library import: warnings hs-source-dirs: src exposed-modules: Types + , App.Env , Data.Validation , Progress , UnitConversion From d0a9b1f4faae4e1caaa97c74518a2585d7e856fd Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 01:08:14 +0200 Subject: [PATCH 08/43] refactor(api): migrate Servant handlers to AppM via hoistServer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert lcaServer and the underlying handler / helper layer in src/API/Routes.hs and src/API/DatabaseHandlers.hs to live in the AppM capability monad (ReaderT AppEnv Handler) introduced in the prior commit. The natural transformation `runApp env :: forall a. AppM a -> Handler a` is applied via Servant.hoistServer at the API boundary, so the LCAAPI route type itself is unchanged and openapi.json stays byte-identical against the pre-branch baseline. What changes architecturally: - lcaServer :: DatabaseManager -> Int -> Maybe String -> Maybe Config.HostingConfig -> [Config.ClassificationPreset] -> Server LCAAPI becomes lcaServer :: AppEnv -> Server LCAAPI, with the five env fields exposed via let-bindings inside the where block for the handlers' closure-style access (LOC-equivalent to inserting an `asks` at every call site, and easier to read). - All ~30 fanout calls of DBHandlers.X dbManager collapse to DBHandlers.X — DBHandlers is now AppM-typed end to end. - The shared cross-DB helpers (requireDatabaseByName, requireFullyLinked, inventoryWithDeps, solutionWithDeps, inventoriesWithDeps, solutionsWithDeps, activityLCIABatchH, batchImpactsH) drop their DatabaseManager-as-first-arg parameter and read it from the env via `asks getDatabaseManager`. - app/Main.hs packs the AppEnv once with mkAppEnv before passing it to lcaServer. - src/API/BatchImpacts.hs (the non-Servant MCP-side wrapper) builds a minimal AppEnv on the fly and discharges the AppM with runApp + Servant.runHandler. What does *not* change: - The LCAAPI route surface and JSON contract (openapi.json byte-identical). - The full hspec suite (1052/1052) is green. - The handler logic and error mapping. This commit is roughly LOC-neutral on its own (the `asks` insertions in helpers offset the arg removals at call sites), but it is the foundation that the next commit (optparse dedup) and any future capability-style refactor (MCP Freer, finer Has-classes per endpoint, applicative request validation) build on. Categorically: lcaServer now lives in the Kleisli category of `Reader AppEnv` lifted into Handler's Kleisli category. hoistServer is the ServerT-functor's action on the natural transformation AppM ~> Handler. --- app/Main.hs | 3 +- src/API/BatchImpacts.hs | 20 +-- src/API/DatabaseHandlers.hs | 122 ++++++++------ src/API/Routes.hs | 314 +++++++++++++++++++----------------- 4 files changed, 251 insertions(+), 208 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 6a3892b4..62593b08 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -37,6 +37,7 @@ import Progress import API.Licenses (licensesResponse) import API.MCP (mcpApp, toolDefinitions) import API.Routes (lcaAPI, lcaServer, volcaOpenApi) +import App.Env (mkAppEnv) import Data.Aeson (encode) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as C8 @@ -284,7 +285,7 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml | path == "/api/v1/logs/stream" -> handleLogStream req respond | C8.pack "/api/" `BS.isPrefixOf` path -> - serve lcaAPI (lcaServer dbManager maxTreeDepth password hostingConfig filterPresets) req respond + serve lcaAPI (lcaServer (mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets)) req respond | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped | otherwise -> serveSpaIndex diff --git a/src/API/BatchImpacts.hs b/src/API/BatchImpacts.hs index b7b25810..1624ff49 100644 --- a/src/API/BatchImpacts.hs +++ b/src/API/BatchImpacts.hs @@ -21,6 +21,7 @@ module API.BatchImpacts ( import API.Routes (activityLCIABatchH, batchImpactsH, collectionNotLoadedPrefix, databaseNotLoadedPrefix) import API.Types (BatchImpactsRequest (..), BatchImpactsResponse, LCIABatchResult, SubstitutionRequest) +import App.Env (mkAppEnv, runApp) import Control.Concurrent.STM (readTVarIO) import qualified Data.ByteString.Lazy as BSL import qualified Data.Map as M @@ -77,7 +78,8 @@ runActivityLCIABatch :: Maybe SubstitutionRequest -> IO (Either BatchError LCIABatchResult) runActivityLCIABatch dbm dbName pid coll mSub = do - res <- Servant.runHandler (activityLCIABatchH dbm dbName pid coll mSub) + let env = mkAppEnv dbm 0 Nothing Nothing [] + res <- Servant.runHandler (runApp env (activityLCIABatchH dbName pid coll mSub)) case res of Right lbr -> pure (Right lbr) Left se -> Left <$> translateErrorIO dbm se @@ -99,15 +101,15 @@ runBatchImpacts :: [Text] -> IO (Either BatchError BatchImpactsResponse) runBatchImpacts dbm dbName coll topFlows pids = do + let env = mkAppEnv dbm 0 Nothing Nothing [] res <- - Servant.runHandler - ( batchImpactsH - dbm - dbName - coll - topFlows - (BatchImpactsRequest{birProcessIds = pids}) - ) + Servant.runHandler $ + runApp env $ + batchImpactsH + dbName + coll + topFlows + (BatchImpactsRequest{birProcessIds = pids}) case res of Right r -> pure (Right r) Left se -> Left <$> translateErrorIO dbm se diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs index abb4782d..c87f21fe 100644 --- a/src/API/DatabaseHandlers.hs +++ b/src/API/DatabaseHandlers.hs @@ -131,17 +131,21 @@ import Database.Upload ( ) import qualified Database.UploadedDatabase as UploadedDB import Types (Database (..), GeographyPolicy (..), unresolvedCount) +import App.Env (AppM, HasDatabaseManager (..)) +import Control.Monad.Reader (asks) -- | List all databases -getDatabases :: DatabaseManager -> Handler DatabaseListResponse -getDatabases dbManager = do +getDatabases :: AppM DatabaseListResponse +getDatabases = do + dbManager <- asks getDatabaseManager dbStatuses <- liftIO $ listDatabases dbManager let statusList = map convertDbStatus dbStatuses return $ DatabaseListResponse statusList -- | Load a database on demand -loadDatabaseHandler :: DatabaseManager -> Text -> Handler LoadDatabaseResponse -loadDatabaseHandler dbManager dbName = do +loadDatabaseHandler :: Text -> AppM LoadDatabaseResponse +loadDatabaseHandler dbName= do + dbManager <- asks getDatabaseManager eitherResult <- liftIO $ try $ loadDatabase dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -152,16 +156,18 @@ loadDatabaseHandler dbManager dbName = do return $ LoadSucceeded status depResults -- | Unload a database from memory -unloadDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse -unloadDatabaseHandler dbManager dbName = +unloadDatabaseHandler :: Text -> AppM ActivateResponse +unloadDatabaseHandler dbName = do + dbManager <- asks getDatabaseManager simpleAction (unloadDatabase dbManager dbName) ("Unloaded database: " <> dbName) {- | Re-run cross-DB linking for a loaded database against the currently-loaded dependency databases. Lets the user recover from loads that happened in a suboptimal order without reloading the whole database. -} -relinkDatabaseHandler :: DatabaseManager -> Text -> Handler RelinkResponse -relinkDatabaseHandler dbManager dbName = do +relinkDatabaseHandler :: Text -> AppM RelinkResponse +relinkDatabaseHandler dbName= do + dbManager <- asks getDatabaseManager res <- liftIO $ relinkDatabase dbManager dbName case res of Left err -> throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 err} @@ -176,13 +182,15 @@ relinkDatabaseHandler dbManager dbName = do } -- | Delete an uploaded database (move to trash) -deleteDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse -deleteDatabaseHandler dbManager dbName = +deleteDatabaseHandler :: Text -> AppM ActivateResponse +deleteDatabaseHandler dbName = do + dbManager <- asks getDatabaseManager simpleAction (removeDatabase dbManager dbName) ("Deleted database: " <> dbName) -- | Upload a new database -uploadDatabaseHandler :: DatabaseManager -> UploadRequest -> Handler UploadResponse -uploadDatabaseHandler dbManager req = do +uploadDatabaseHandler :: UploadRequest -> AppM UploadResponse +uploadDatabaseHandler req= do + dbManager <- asks getDatabaseManager -- Decode base64 ZIP data let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case zipDataResult of @@ -314,8 +322,9 @@ formatToText UnknownFormat = "unknown" {- | Get database setup info Returns completeness, missing suppliers, and dependency suggestions -} -getDatabaseSetupHandler :: DatabaseManager -> Text -> Handler DatabaseSetupInfo -getDatabaseSetupHandler dbManager dbName = do +getDatabaseSetupHandler :: Text -> AppM DatabaseSetupInfo +getDatabaseSetupHandler dbName= do + dbManager <- asks getDatabaseManager eitherResult <- liftIO $ try $ getDatabaseSetupInfo dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -327,8 +336,9 @@ getDatabaseSetupHandler dbManager dbName = do {- | Add a dependency to a staged database Runs cross-DB linking and returns updated setup info -} -addDependencyHandler :: DatabaseManager -> Text -> Text -> Handler DatabaseSetupInfo -addDependencyHandler dbManager dbName depName = do +addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo +addDependencyHandler dbName depName= do + dbManager <- asks getDatabaseManager result <- liftIO $ addDependencyToStaged dbManager dbName depName case result of Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} @@ -337,16 +347,18 @@ addDependencyHandler dbManager dbName depName = do {- | Remove a dependency from a staged database Re-runs cross-DB linking and returns updated setup info -} -removeDependencyHandler :: DatabaseManager -> Text -> Text -> Handler DatabaseSetupInfo -removeDependencyHandler dbManager dbName depName = do +removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo +removeDependencyHandler dbName depName= do + dbManager <- asks getDatabaseManager result <- liftIO $ removeDependencyFromStaged dbManager dbName depName case result of Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} Right setupInfo -> return setupInfo -- | Change the data path for an uploaded (staged) database -setDataPathHandler :: DatabaseManager -> Text -> Value -> Handler DatabaseSetupInfo -setDataPathHandler dbManager dbName body = do +setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo +setDataPathHandler dbName body= do + dbManager <- asks getDatabaseManager -- Extract "path" from JSON body let mPath = case body of A.Object obj -> case KM.lookup "path" obj of @@ -364,8 +376,9 @@ setDataPathHandler dbManager dbName body = do {- | Finalize a staged database Builds matrices and makes it ready for queries -} -finalizeDatabaseHandler :: DatabaseManager -> Text -> Handler ActivateResponse -finalizeDatabaseHandler dbManager dbName = do +finalizeDatabaseHandler :: Text -> AppM ActivateResponse +finalizeDatabaseHandler dbName= do + dbManager <- asks getDatabaseManager eitherResult <- liftIO $ try $ finalizeDatabase dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -378,8 +391,9 @@ finalizeDatabaseHandler dbManager dbName = do {- | Upload a new method collection Same flow as database upload but creates MethodConfig entry -} -uploadMethodHandler :: DatabaseManager -> UploadRequest -> Handler UploadResponse -uploadMethodHandler dbManager req = do +uploadMethodHandler :: UploadRequest -> AppM UploadResponse +uploadMethodHandler req= do + dbManager <- asks getDatabaseManager let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case zipDataResult of Left err -> return $ UploadResponse False ("Invalid base64 data: " <> T.pack err) Nothing Nothing @@ -433,12 +447,13 @@ uploadMethodHandler dbManager req = do (Just $ formatToText $ urFormat uploadResult) -- | Delete an uploaded method collection -deleteMethodHandler :: DatabaseManager -> Text -> Handler ActivateResponse -deleteMethodHandler dbManager name = +deleteMethodHandler :: Text -> AppM ActivateResponse +deleteMethodHandler name = do + dbManager <- asks getDatabaseManager simpleAction (removeMethodCollection dbManager name) ("Deleted method: " <> name) -- | Common pattern: run an IO action that returns Either Text (), map to ActivateResponse -simpleAction :: IO (Either Text ()) -> Text -> Handler ActivateResponse +simpleAction :: IO (Either Text ()) -> Text -> AppM ActivateResponse simpleAction action successMsg = do result <- liftIO action return $ case result of @@ -481,29 +496,34 @@ convertRefDataStatus s = , rdaEntryCount = rdsEntryCount s } -listRefData :: RefDataKind -> DatabaseManager -> Handler RefDataListResponse -listRefData kind mgr = do +listRefData :: RefDataKind -> AppM RefDataListResponse +listRefData kind = do + dbManager <- asks getDatabaseManager let (listFn, _, _, _, _, _) = rdOps kind - statuses <- liftIO $ listFn mgr + statuses <- liftIO $ listFn dbManager return $ RefDataListResponse (map convertRefDataStatus statuses) -loadRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse -loadRefData kind mgr name = do +loadRefData :: RefDataKind -> Text -> AppM ActivateResponse +loadRefData kind name = do + dbManager <- asks getDatabaseManager let (_, loadFn, _, _, _, _) = rdOps kind - simpleAction (loadFn mgr name) ("Loaded: " <> name) + simpleAction (loadFn dbManager name) ("Loaded: " <> name) -unloadRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse -unloadRefData kind mgr name = do +unloadRefData :: RefDataKind -> Text -> AppM ActivateResponse +unloadRefData kind name = do + dbManager <- asks getDatabaseManager let (_, _, unloadFn, _, _, _) = rdOps kind - simpleAction (unloadFn mgr name) ("Unloaded: " <> name) + simpleAction (unloadFn dbManager name) ("Unloaded: " <> name) -deleteRefData :: RefDataKind -> DatabaseManager -> Text -> Handler ActivateResponse -deleteRefData kind mgr name = do +deleteRefData :: RefDataKind -> Text -> AppM ActivateResponse +deleteRefData kind name = do + dbManager <- asks getDatabaseManager let (_, _, _, _, removeFn, _) = rdOps kind - simpleAction (removeFn mgr name) ("Deleted: " <> name) + simpleAction (removeFn dbManager name) ("Deleted: " <> name) -uploadRefData :: RefDataKind -> DatabaseManager -> UploadRequest -> Handler UploadResponse -uploadRefData kind mgr req = do +uploadRefData :: RefDataKind -> UploadRequest -> AppM UploadResponse +uploadRefData kind req = do + dbManager <- asks getDatabaseManager let (_, _, _, addFn, _, subdir) = rdOps kind let csvDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case csvDataResult of @@ -535,24 +555,26 @@ uploadRefData kind mgr req = do , rdIsAuto = False , rdDescription = urDescription req } - liftIO $ addFn mgr rd + liftIO $ addFn dbManager rd return $ UploadResponse True "Uploaded successfully" (Just slug) Nothing where quote t = "\"" <> T.replace "\"" "\\\"" t <> "\"" -getFlowSynonymGroupsHandler :: DatabaseManager -> Text -> Handler SynonymGroupsResponse -getFlowSynonymGroupsHandler mgr name = do - result <- liftIO $ getFlowSynonymGroups mgr name +getFlowSynonymGroupsHandler :: Text -> AppM SynonymGroupsResponse +getFlowSynonymGroupsHandler name = do + dbManager <- asks getDatabaseManager + result <- liftIO $ getFlowSynonymGroups dbManager name case result of Left err -> throwError $ err404{errBody = BSL.fromStrict $ T.encodeUtf8 err} Right groups -> return $ SynonymGroupsResponse groups -downloadRefDataHandler :: RefDataKind -> DatabaseManager -> Text -> Handler (Headers '[Header "Content-Disposition" Text] BinaryContent) -downloadRefDataHandler kind mgr name = do +downloadRefDataHandler :: RefDataKind -> Text -> AppM (Headers '[Header "Content-Disposition" Text] BinaryContent) +downloadRefDataHandler kind name = do + dbManager <- asks getDatabaseManager let tvar = case kind of - FlowSynonyms -> dmAvailableFlowSyns mgr - CompartmentMappings -> dmAvailableCompMaps mgr - UnitDefs -> dmAvailableUnitDefs mgr + FlowSynonyms -> dmAvailableFlowSyns dbManager + CompartmentMappings -> dmAvailableCompMaps dbManager + UnitDefs -> dmAvailableUnitDefs dbManager available <- liftIO $ readTVarIO tvar case M.lookup name available of Nothing -> throwError $ err404{errBody = "Not found"} diff --git a/src/API/Routes.hs b/src/API/Routes.hs index e7ff05b9..3bcfc67f 100644 --- a/src/API/Routes.hs +++ b/src/API/Routes.hs @@ -55,6 +55,8 @@ import qualified SharedSolver import Tree (buildLoopAwareTree) import Types import qualified Version +import App.Env (AppEnv (..), AppM, HasDatabaseManager (..), runApp) +import Control.Monad.Reader (asks) -- | API type definition - RESTful design with focused endpoints type LCAAPI = @@ -184,8 +186,9 @@ notLoadedBody :: Text -> Text -> BSL.ByteString notLoadedBody prefix name = BSL.fromStrict (T.encodeUtf8 (prefix <> name)) -- | Get database by name, throw 404 if not loaded -requireDatabaseByName :: DatabaseManager -> Text -> Handler (Database, SharedSolver) -requireDatabaseByName dbManager dbName = do +requireDatabaseByName :: Text -> AppM (Database, SharedSolver) +requireDatabaseByName dbName = do + dbManager <- asks getDatabaseManager maybeLoaded <- liftIO $ getDatabase dbManager dbName case maybeLoaded of Just loaded -> return (ldDatabase loaded, ldSharedSolver loaded) @@ -195,7 +198,7 @@ requireDatabaseByName dbManager dbName = do the user to load the missing dep DBs (or POST /relink) rather than silently undercounting impacts. -} -requireFullyLinked :: Text -> Database -> Handler () +requireFullyLinked :: Text -> Database -> AppM () requireFullyLinked dbName db = let n = unresolvedCount (dbLinkingStats db) in when (n > 0) $ @@ -217,17 +220,18 @@ requireFullyLinked dbName db = } -- | Inventory with cross-DB back-substitution; maps unit-conversion errors to 422. -inventoryWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Handler Inventory -inventoryWithDeps dbManager dbName db solver pid = - SharedSolver.csInventory <$> solutionWithDeps dbManager dbName db solver pid +inventoryWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM Inventory +inventoryWithDeps dbName db solver pid = + SharedSolver.csInventory <$> solutionWithDeps dbName db solver pid {- | Cross-DB inventory + per-DB scaling vectors. The scalings are needed by the regionalized LCIA path (per-DB dot products summed across all DBs reached at request time); the inventory alone is enough for non-regional methods. -} -solutionWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Handler SharedSolver.CrossDBSolution -solutionWithDeps dbManager dbName db solver pid = do +solutionWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM SharedSolver.CrossDBSolution +solutionWithDeps dbName db solver pid = do + dbManager <- asks getDatabaseManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager res <- @@ -244,13 +248,14 @@ solutionWithDeps dbManager dbName db solver pid = do Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err} -- | Batch inventory with cross-DB back-substitution; maps unit-conversion errors to 422. -inventoriesWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> [ProcessId] -> Handler [Inventory] -inventoriesWithDeps dbManager dbName db solver pids = - map SharedSolver.csInventory <$> solutionsWithDeps dbManager dbName db solver pids +inventoriesWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [Inventory] +inventoriesWithDeps dbName db solver pids = + map SharedSolver.csInventory <$> solutionsWithDeps dbName db solver pids -- | Batch variant of 'solutionWithDeps'. -solutionsWithDeps :: DatabaseManager -> Text -> Database -> SharedSolver -> [ProcessId] -> Handler [SharedSolver.CrossDBSolution] -solutionsWithDeps dbManager dbName db solver pids = do +solutionsWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [SharedSolver.CrossDBSolution] +solutionsWithDeps dbName db solver pids = do + dbManager <- asks getDatabaseManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager res <- @@ -327,7 +332,7 @@ mkCrossDBContrib dbManager rootDbName _flowDB unitDB score ((depDbName, pid), c) } -- | Helper function to validate ProcessId and lookup activity -withValidatedActivity :: Database -> Text -> (Activity -> Handler a) -> Handler a +withValidatedActivity :: Database -> Text -> (Activity -> AppM a) -> AppM a withValidatedActivity db processId action = do case Service.resolveActivityByProcessId db processId of Left (Service.InvalidProcessId errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg} @@ -338,7 +343,7 @@ withValidatedActivity db processId action = do {- | Helper function to validate UUID and lookup flow. Returns a tagged sum so callers can dispatch on tech vs bio. -} -withValidatedFlow :: Database -> Text -> (FlowKind -> Handler a) -> Handler a +withValidatedFlow :: Database -> Text -> (FlowKind -> AppM a) -> AppM a withValidatedFlow db uuid action = do case Service.validateUUID uuid of Left (Service.InvalidUUID errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg} @@ -396,7 +401,7 @@ expandPreset presets (Just pn) = case find (\p -> Config.cpName p == pn) presets -- ============================================================================ -- Hoisted helpers — previously in lcaServer's `where`. Lifted to top level so -- non-Servant callers (notably src/API/BatchImpacts.hs and any client of the --- LCIA batch pipeline outside the Servant Handler stack) can reuse them. +-- LCIA batch pipeline outside the Servant AppM stack) can reuse them. -- -- Behavior is byte-identical to the original where-bound versions. -- ============================================================================ @@ -474,7 +479,7 @@ logLCIAResult result method = do HTTP status. Validates the resolved ProcessId against the technosphere matrix index too — see Service.validateProcessIdInMatrixIndex. -} -resolveOrThrow :: Database -> Text -> Handler (ProcessId, Activity) +resolveOrThrow :: Database -> Text -> AppM (ProcessId, Activity) resolveOrThrow db processIdText = case Service.resolveActivityAndProcessId db processIdText of Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} @@ -487,13 +492,13 @@ resolveOrThrow db processIdText = Left e -> internalError e Right () -> return (pid, act) where - internalError :: Service.ServiceError -> Handler a + internalError :: Service.ServiceError -> AppM a internalError e = throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show e} {- | Translate a Service-level error into the HTTP status used across the cross-DB LCIA paths. -} -throwServiceError :: Service.ServiceError -> Handler a +throwServiceError :: Service.ServiceError -> AppM a throwServiceError (Service.ActivityNotFound _) = throwError err404{errBody = "Activity not found"} throwServiceError (Service.InvalidProcessId msg) = throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg} -- MatrixError covers singular Sherman-Morrison, missing technosphere links, @@ -504,7 +509,7 @@ throwServiceError (Service.InvalidUUID _) = throwError err500{errBody = "Interna throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Internal server error"} -- | Load a method collection by name from the live DatabaseManager state. -loadCollection :: DatabaseManager -> Text -> Handler ([Method], [DamageCategory], [NormWeightSet], [ScoringSet]) +loadCollection :: DatabaseManager -> Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet]) loadCollection dbManager collectionName = do loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager) case M.lookup collectionName loadedCollections of @@ -515,9 +520,9 @@ loadCollection dbManager collectionName = do no-substitution path ('requireFullyLinked' runs inside 'solutionWithDeps'); 'Just' applies the substitutions through the uncached path. -} -crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> Handler SharedSolver.CrossDBSolution +crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution crossDBSolutionFor dbManager dbName db solver pid mSub = case mSub of - Nothing -> solutionWithDeps dbManager dbName db solver pid + Nothing -> solutionWithDeps dbName db solver pid Just subReq -> do requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager @@ -772,18 +777,18 @@ buildLCIABatchResultCached dbManager dbName db actPid activity collection sol ct computeAllScoringSets (mcScoringSets collection) rawScoreMap pure (mkLCIABatchResult results mNW nwSets scoringResults (mcScoringSets collection) scoringIndicators) -{- | Top-level LCIA batch entry point — Handler-returning. Used by the Servant +{- | Top-level LCIA batch entry point — AppM-returning. Used by the Servant routes (via thin where-aliases) and by API.BatchImpacts. -} activityLCIABatchH :: - DatabaseManager -> Text -> Text -> Text -> Maybe SubstitutionRequest -> - Handler LCIABatchResult -activityLCIABatchH dbManager dbName processIdText collectionName mSub = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + AppM LCIABatchResult +activityLCIABatchH dbName processIdText collectionName mSub = do + dbManager <- asks getDatabaseManager + (db, sharedSolver) <- requireDatabaseByName dbName (actProcessId, activity) <- resolveOrThrow db processIdText (methods, damageCats, nwSets, scoringSets) <- loadCollection dbManager collectionName let dcLookup = M.fromList [(subName, dcName dc) | dc <- damageCats, (subName, _) <- dcImpacts dc] @@ -840,14 +845,14 @@ valid PIDs, parallel characterization. Used by the Servant POST route and by API.BatchImpacts. -} batchImpactsH :: - DatabaseManager -> Text -> Text -> Maybe Int -> BatchImpactsRequest -> - Handler BatchImpactsResponse -batchImpactsH dbManager dbName collectionName topFlowsParam req = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + AppM BatchImpactsResponse +batchImpactsH dbName collectionName topFlowsParam req = do + dbManager <- asks getDatabaseManager + (db, sharedSolver) <- requireDatabaseByName dbName loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager) collection <- case M.lookup collectionName loadedCollections of Just mc -> pure mc @@ -861,7 +866,7 @@ batchImpactsH dbManager dbName collectionName topFlowsParam req = do invalid = [pidText | (pidText, Left (Service.InvalidProcessId _)) <- resolved] validPidNums = [pidNum | (_, pidNum, _) <- valid] t0 <- liftIO getCurrentTime - sols <- solutionsWithDeps dbManager dbName db sharedSolver validPidNums + sols <- solutionsWithDeps dbName db sharedSolver validPidNums t1 <- liftIO getCurrentTime ctxs <- liftIO $ mapConcurrently (prepMethodCtx dbManager dbName db) (mcMethods collection) let topFlows = max 0 (fromMaybe 0 topFlowsParam) @@ -906,12 +911,26 @@ batchImpactsH dbManager dbName collectionName topFlowsParam req = do , birInvalid = invalid } -{- | API server implementation -DatabaseManager is used to dynamically fetch current database on each request +{- | API server implementation. Handlers live in 'AppM' (a 'ReaderT' +'AppEnv' over Servant's 'Handler'); 'hoistServer' is the natural +transformation that lifts the @ServerT LCAAPI AppM@ into a plain +@Server LCAAPI@ Servant expects at the WAI boundary. -} -lcaServer :: DatabaseManager -> Int -> Maybe String -> Maybe Config.HostingConfig -> [Config.ClassificationPreset] -> Server LCAAPI -lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = - getActivityInfo +lcaServer :: AppEnv -> Server LCAAPI +lcaServer env = + hoistServer lcaAPI (runApp env) handlers + where + -- Legacy closure access to env fields, kept so handler bodies (and + -- the helpers they call) don't all need an explicit `asks`. We could + -- replace this with capability-class projections at every call site, + -- but the let-binding is identical in LOC and easier to read. + dbManager = aeDbManager env + maxTreeDepth = aeMaxTreeDepth env + password = aePassword env + hostingConfig = aeHostingConfig env + classificationPresets = aeClassificationPresets env + handlers = + getActivityInfo :<|> getActivityFlows :<|> getActivityInputs :<|> getActivityOutputs @@ -945,42 +964,42 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = :<|> searchActivitiesWithCount :<|> getClassifications :<|> postImpactsBatch - :<|> DBHandlers.getDatabases dbManager - :<|> DBHandlers.loadDatabaseHandler dbManager - :<|> DBHandlers.unloadDatabaseHandler dbManager - :<|> DBHandlers.relinkDatabaseHandler dbManager - :<|> DBHandlers.deleteDatabaseHandler dbManager - :<|> DBHandlers.uploadDatabaseHandler dbManager - :<|> DBHandlers.getDatabaseSetupHandler dbManager - :<|> DBHandlers.addDependencyHandler dbManager - :<|> DBHandlers.removeDependencyHandler dbManager - :<|> DBHandlers.setDataPathHandler dbManager - :<|> DBHandlers.finalizeDatabaseHandler dbManager + :<|> DBHandlers.getDatabases + :<|> DBHandlers.loadDatabaseHandler + :<|> DBHandlers.unloadDatabaseHandler + :<|> DBHandlers.relinkDatabaseHandler + :<|> DBHandlers.deleteDatabaseHandler + :<|> DBHandlers.uploadDatabaseHandler + :<|> DBHandlers.getDatabaseSetupHandler + :<|> DBHandlers.addDependencyHandler + :<|> DBHandlers.removeDependencyHandler + :<|> DBHandlers.setDataPathHandler + :<|> DBHandlers.finalizeDatabaseHandler :<|> getMethodCollections :<|> loadMethodCollectionHandler :<|> unloadMethodCollectionHandler - :<|> DBHandlers.deleteMethodHandler dbManager - :<|> DBHandlers.uploadMethodHandler dbManager + :<|> DBHandlers.deleteMethodHandler + :<|> DBHandlers.uploadMethodHandler -- Flow synonyms - :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms dbManager - :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms dbManager - :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms dbManager - :<|> DBHandlers.deleteRefData DBHandlers.FlowSynonyms dbManager - :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms dbManager - :<|> DBHandlers.getFlowSynonymGroupsHandler dbManager - :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms dbManager + :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms + :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms + :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms + :<|> DBHandlers.deleteRefData DBHandlers.FlowSynonyms + :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms + :<|> DBHandlers.getFlowSynonymGroupsHandler + :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms -- Compartment mappings - :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings dbManager - :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings dbManager - :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings dbManager - :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings dbManager - :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings dbManager + :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings + :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings + :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings + :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings + :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings -- Units - :<|> DBHandlers.listRefData DBHandlers.UnitDefs dbManager - :<|> DBHandlers.loadRefData DBHandlers.UnitDefs dbManager - :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs dbManager - :<|> DBHandlers.deleteRefData DBHandlers.UnitDefs dbManager - :<|> DBHandlers.uploadRefData DBHandlers.UnitDefs dbManager + :<|> DBHandlers.listRefData DBHandlers.UnitDefs + :<|> DBHandlers.loadRefData DBHandlers.UnitDefs + :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs + :<|> DBHandlers.deleteRefData DBHandlers.UnitDefs + :<|> DBHandlers.uploadRefData DBHandlers.UnitDefs :<|> getLogsHandler :<|> postAuth :<|> getVersion @@ -988,11 +1007,10 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = :<|> getStats :<|> getClassificationPresets :<|> getOpenApiSpec - where - getOpenApiSpec :: Handler Value + getOpenApiSpec :: AppM Value getOpenApiSpec = return $ toJSON volcaOpenApi - getVersion :: Handler Value + getVersion :: AppM Value getVersion = return $ object @@ -1002,7 +1020,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = , "buildTarget" .= Version.buildTarget ] - getHosting :: Handler Value + getHosting :: AppM Value getHosting = return $ case hostingConfig of Just hc -> object @@ -1023,7 +1041,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = , "upgrade_vm_size" .= ("" :: Text) ] - getStats :: Handler Value + getStats :: AppM Value getStats = liftIO $ do enabled <- GHC.Stats.getRTSStatsEnabled if enabled @@ -1040,7 +1058,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = object ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)] - getClassificationPresets :: Handler [ClassificationPresetInfo] + getClassificationPresets :: AppM [ClassificationPresetInfo] getClassificationPresets = return $ map toInfo classificationPresets where toInfo p = @@ -1051,7 +1069,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p) } - getLogsHandler :: Maybe Int -> Handler Value + getLogsHandler :: Maybe Int -> AppM Value getLogsHandler sinceMaybe = do let since = fromMaybe 0 sinceMaybe (nextIndex, logLines) <- liftIO $ getLogLines since @@ -1061,7 +1079,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = , "nextIndex" .= nextIndex ] - postAuth :: LoginRequest -> Handler (Headers '[Header "Set-Cookie" String] Value) + postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value) postAuth loginReq = case password of Nothing -> @@ -1076,9 +1094,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = throwError err401{errBody = "{\"error\":\"invalid code\"}"} -- Core activity endpoint - streamlined data - getActivityInfo :: Text -> Text -> Handler ActivityInfo + getActivityInfo :: Text -> Text -> AppM ActivityInfo getActivityInfo dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName unitCfg <- liftIO $ getMergedUnitConfig dbManager case Service.getActivityInfo unitCfg db processId of Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} @@ -1089,39 +1107,39 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err} -- Activity flows sub-resource - getActivityFlows :: Text -> Text -> Handler [FlowSummary] + getActivityFlows :: Text -> Text -> AppM [FlowSummary] getActivityFlows dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedActivity db processId $ \activity -> return $ Service.getActivityFlowSummaries db activity -- Activity inputs sub-resource - getActivityInputs :: Text -> Text -> Handler [ExchangeDetail] + getActivityInputs :: Text -> Text -> AppM [ExchangeDetail] getActivityInputs dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedActivity db processId $ \activity -> return $ Service.getActivityInputDetails db activity -- Activity outputs sub-resource - getActivityOutputs :: Text -> Text -> Handler [ExchangeDetail] + getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail] getActivityOutputs dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedActivity db processId $ \activity -> return $ Service.getActivityOutputDetails db activity -- Activity reference product sub-resource - getActivityReferenceProduct :: Text -> Text -> Handler FlowDetail + getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail getActivityReferenceProduct dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedActivity db processId $ \activity -> do case Service.getActivityReferenceProductDetail db activity of Nothing -> throwError err404{errBody = "No reference product found"} Just refProduct -> return refProduct -- Activity tree export for visualization (configurable depth) - getActivityTree :: Text -> Text -> Handler TreeExport + getActivityTree :: Text -> Text -> AppM TreeExport getActivityTree dbName processId = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedActivity db processId $ \_activity -> do -- Use CLI --tree-depth option for configurable depth -- Default depth limit prevents DOS attacks via deep tree requests @@ -1140,21 +1158,21 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Goes through the cross-DB back-substitution path so inventories from -- dep DBs are merged into the returned flow map; metadata (flow names, -- units) comes from the merged FlowDB/UnitDB snapshot. - activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> Handler InventoryExport + activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport activityInventoryCore dbName processIdText mSub = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName (processId, activity) <- resolveOrThrow db processIdText sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol) - getActivityInventory :: Text -> Text -> Handler InventoryExport + getActivityInventory :: Text -> Text -> AppM InventoryExport getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing -- Activity graph endpoint for network visualization - getActivityGraph :: Text -> Text -> Maybe Double -> Handler GraphExport + getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport getActivityGraph dbName processId maybeCutoff = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent case result of @@ -1193,9 +1211,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Activity supply chain endpoint (scaling vector based). 'Nothing' takes the -- cached solve; 'Just' resolves substitutions through the cross-DB resolver. - activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> Handler SupplyChainResponse + activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> AppM SupplyChainResponse activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName let includeEdges = fromMaybe False includeEdgesParam scf = buildSupplyChainFilter @@ -1256,7 +1274,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = includeEdges either throwServiceError pure eResp - getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Handler SupplyChainResponse + getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM SupplyChainResponse getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam = activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing @@ -1277,7 +1295,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Maybe Bool -> -- filter_is_reference Maybe Text -> -- group_by Maybe Text -> -- aggregate fn - Handler Aggregation + AppM Aggregation getActivityAggregate dbName processId @@ -1294,7 +1312,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = freferenceParam groupByParam aggregateParam = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName -- Field-level validation via the Validation Applicative (accumulating). -- A request with both an invalid `scope` and an invalid `aggregate` -- now reports both errors at once, instead of just the first. @@ -1369,9 +1387,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Activity LCIA endpoint (single method within a collection). The GET -- variant honours a top-flows query param and logs the result; the POST -- route carries neither (no top-flows param, no logging). - activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> Handler LCIAResult + activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText (processId, activity) <- resolveOrThrow db processIdText sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub @@ -1379,12 +1397,12 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = when (isNothing mSub) $ liftIO $ logLCIAResult result method pure result - getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> Handler LCIAResult + getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam = activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing -- POST: LCIA with substitutions - postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> Handler LCIAResult + postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult postActivityLCIA dbName processIdText _collectionName methodIdText subReq = activityLCIACore dbName processIdText methodIdText Nothing (Just subReq) @@ -1402,9 +1420,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Cost: each perturbation now triggers one cross-DB back-substitution -- per dep DB it actually reaches. MUMPS factorizations are cached, so -- back-sub is O(n²) per dep DB, not full O(n³) factorization. - postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> Handler SensitivityResponse + postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName requireFullyLinked dbName db method <- loadMethodByUUID methodIdText (processId, activity) <- resolveOrThrow db processIdText @@ -1454,31 +1472,31 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Batch LCIA endpoint (all methods in a collection). Thin alias over the -- top-level activityLCIABatchH; preserves the Servant call sites. - activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> Handler LCIABatchResult - activityLCIABatchCore = activityLCIABatchH dbManager + activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> AppM LCIABatchResult + activityLCIABatchCore = activityLCIABatchH - getActivityLCIABatch :: Text -> Text -> Text -> Handler LCIABatchResult + getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult getActivityLCIABatch dbName processIdText collectionName = activityLCIABatchCore dbName processIdText collectionName Nothing -- POST: Batch LCIA with substitutions - postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> Handler LCIABatchResult + postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult postActivityLCIABatch dbName processIdText collectionName subReq = activityLCIABatchCore dbName processIdText collectionName (Just subReq) -- POST: Inventory with substitutions - postActivityInventory :: Text -> Text -> SubstitutionRequest -> Handler InventoryExport + postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq) -- POST: Supply chain with substitutions - postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> Handler SupplyChainResponse + postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> AppM SupplyChainResponse postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq = activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq) -- Activity consumers endpoint (reverse supply chain) - getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> Handler ConsumersResponse + getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM ConsumersResponse getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName let presetFilters = expandPreset classificationPresets presetParam explicitFilters = zipWith3 @@ -1510,9 +1528,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Right consumers -> return consumers -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity) - getActivityPathTo :: Text -> Text -> Maybe Text -> Handler Value + getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value getActivityPathTo dbName processIdText targetParam = do - (db, solver) <- requireDatabaseByName dbManager dbName + (db, solver) <- requireDatabaseByName dbName target <- maybe (throwError err400{errBody = "Missing required 'target' query parameter"}) @@ -1533,9 +1551,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- Call sites in this `where` pass `dbManager` explicitly when needed.) -- Activity analysis endpoint (dispatches to registered analyzers) - getActivityAnalyze :: Text -> Text -> Text -> Handler Value + getActivityAnalyze :: Text -> Text -> Text -> AppM Value getActivityAnalyze dbName processIdText analyzerName = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)} Just analyzer -> do @@ -1544,7 +1562,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} Right (actProcessId, _) -> do - inventory <- inventoryWithDeps dbManager dbName db sharedSolver actProcessId + inventory <- inventoryWithDeps dbName db sharedSolver actProcessId (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager loadedMethods <- liftIO $ DM.getLoadedMethods dbManager let methods = map snd loadedMethods @@ -1561,13 +1579,13 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = liftIO $ ahAnalyze analyzer ctx -- Contributing flows: top elementary flows by LCIA contribution for a specific method - getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> Handler ContributingFlowsResult + getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult getContributingFlows dbName processIdText _collectionName methodIdText limitParam = withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do let lim = fromMaybe 20 limitParam unitCfg <- liftIO $ getMergedUnitConfig dbManager (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - inventory <- inventoryWithDeps dbManager dbName db sharedSolver actProcessId + inventory <- inventoryWithDeps dbName db sharedSolver actProcessId tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables) (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables @@ -1602,7 +1620,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = } -- Contributing activities: top upstream activities by LCIA contribution for a specific method - getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> Handler ContributingActivitiesResult + getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult getContributingActivities dbName processIdText _collectionName methodIdText limitParam = withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do let lim = fromMaybe 10 limitParam @@ -1650,9 +1668,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = -- as a sum across all participating DBs (root + each dep DB reached at -- request time); non-regional methods read the merged inventory only. -- Flow detail endpoint - getFlowDetail :: Text -> Text -> Handler FlowDetail + getFlowDetail :: Text -> Text -> AppM FlowDetail getFlowDetail dbName flowIdText = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedFlow db flowIdText $ \flow -> do let fid = flowKindId flow unitName' = flowKindUnitName (dbUnits db) flow @@ -1660,14 +1678,14 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount -- Activities using a specific flow - getFlowActivities :: Text -> Text -> Handler [ActivitySummary] + getFlowActivities :: Text -> Text -> AppM [ActivitySummary] getFlowActivities dbName flowIdText = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName withValidatedFlow db flowIdText $ \flow -> return $ Service.getActivitiesUsingFlow db (flowKindId flow) -- List all available methods (from loaded collections) - getMethods :: Handler [MethodSummary] + getMethods :: AppM [MethodSummary] getMethods = do loadedMethods <- liftIO $ DM.getLoadedMethods dbManager return @@ -1683,7 +1701,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = ] -- Get method details - getMethodDetail :: Text -> Handler MethodDetail + getMethodDetail :: Text -> AppM MethodDetail getMethodDetail methodIdText = do method <- loadMethodByUUID methodIdText return $ @@ -1698,15 +1716,15 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = } -- Get method characterization factors - getMethodFactors :: Text -> Handler [MethodFactorAPI] + getMethodFactors :: Text -> AppM [MethodFactorAPI] getMethodFactors methodIdText = do method <- loadMethodByUUID methodIdText return $ map cfToAPI (methodFactors method) -- Get method flow mapping status - getMethodMapping :: Text -> Text -> Handler MappingStatus + getMethodMapping :: Text -> Text -> AppM MappingStatus getMethodMapping dbName methodIdText = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method let stats = computeMappingStats mappings @@ -1746,9 +1764,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = } -- DB-flow-centric mapping: all biosphere flows with their CF assignments - getFlowCFMapping :: Text -> Text -> Handler FlowCFMapping + getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping getFlowCFMapping dbName methodIdText = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method let @@ -1790,9 +1808,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = strategyToText NoMatch = "none" -- Characterization: matched CFs for a method, filterable by flow name - getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> Handler CharacterizationResult + getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult getCharacterization dbName methodIdText flowFilter limitParam = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText let lim = fromMaybe 50 limitParam queryLower = fmap T.toLower flowFilter @@ -1833,7 +1851,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = matchesQuery (Just q) cfName dbName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbName) -- Helper to load a method by UUID from the loaded collections - loadMethodByUUID :: Text -> Handler Method + loadMethodByUUID :: Text -> AppM Method loadMethodByUUID uuidText = do loadedMethods <- liftIO $ DM.getLoadedMethods dbManager let allMethods = map snd loadedMethods @@ -1850,10 +1868,10 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Text -> Text -> Text -> - (Database -> SharedSolver -> ProcessId -> Activity -> Method -> Handler a) -> - Handler a + (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a) -> + AppM a withActivityAndMethod dbName processIdText methodIdText k = do - (db, sharedSolver) <- requireDatabaseByName dbManager dbName + (db, sharedSolver) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText case Service.resolveActivityAndProcessId db processIdText of Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} @@ -1862,7 +1880,7 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method -- Method collection handlers - getMethodCollections :: Handler MethodCollectionListResponse + getMethodCollections :: AppM MethodCollectionListResponse getMethodCollections = do statuses <- liftIO $ DM.listMethodCollections dbManager return $ @@ -1882,11 +1900,11 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = | s <- statuses ] - loadMethodCollectionHandler :: Text -> Handler ActivateResponse + loadMethodCollectionHandler :: Text -> AppM ActivateResponse loadMethodCollectionHandler name = simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name) - unloadMethodCollectionHandler :: Text -> Handler ActivateResponse + unloadMethodCollectionHandler :: Text -> AppM ActivateResponse unloadMethodCollectionHandler name = simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name) @@ -1903,9 +1921,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = } -- Search flows by name or synonym with optional language filtering and pagination - searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Handler (SearchResults FlowSearchResult) + searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult) searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName case queryParam of Nothing -> return (SearchResults [] 0 0 50 False 0.0) Just query -> do @@ -1921,9 +1939,9 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = searchFlowsInternal db ff -- Search activities by specific fields with pagination and count - searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Handler (SearchResults ActivitySummary) + searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary) searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName -- Expand preset filters then merge with explicit classification params let exactMatch = fromMaybe False exactParam presetFilters = expandPreset classificationPresets presetParam @@ -1956,14 +1974,14 @@ lcaServer dbManager maxTreeDepth password hostingConfig classificationPresets = Success searchResults -> return searchResults Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr} - getClassifications :: Text -> Handler [ClassificationSystem] + getClassifications :: Text -> AppM [ClassificationSystem] getClassifications dbName = do - (db, _) <- requireDatabaseByName dbManager dbName + (db, _) <- requireDatabaseByName dbName return $ Service.getClassifications db -- Batch impacts: thin alias over the top-level batchImpactsH. - postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> Handler BatchImpactsResponse - postImpactsBatch = batchImpactsH dbManager + postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse + postImpactsBatch = batchImpactsH {- | Evaluate every scoring set against the raw impact score map. Returns (setName → scoreName → value, setName → varName → ScoringIndicator). @@ -2021,7 +2039,7 @@ paginateResults results limitParam offsetParam = do The 'ffQuery' is always present (callers short-circuit on the no-query case); language filtering is not yet implemented. -} -searchFlowsInternal :: Database -> Service.FlowFilter -> Handler (SearchResults FlowSearchResult) +searchFlowsInternal :: Database -> Service.FlowFilter -> AppM (SearchResults FlowSearchResult) searchFlowsInternal db Service.FlowFilter{Service.ffQuery = query, Service.ffLimit = limitParam, Service.ffOffset = offsetParam, Service.ffSort = sortParam, Service.ffOrder = orderParam} = do -- Language filtering not yet implemented, search all synonyms let flows = findFlowsBySynonym db query From e9363e5a4546adeb7efb7efc3f37ebf6b65306c3 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 01:11:34 +0200 Subject: [PATCH 09/43] refactor(cli): factor optparse-applicative builders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six tiny helpers (strOpt / optStrOpt / textOpt / optTextOpt / intOpt / optIntOpt / strArg / textArg) collapse the four shapes that dominated src/CLI/Parser.hs — Text option, Int option, positional arg, with or without short alias — into one line each. ~172 LOC removed, `--help` output unchanged, openapi.json byte-identical, hspec 1052/1052 green. Categorically: optparse-applicative's Parser is the textbook free Applicative over a primitive 'parser action'. Composition with <*> keeps the static structure exposed, which is precisely why a Parser can produce both the input parsing and the `--help` text from one definition. Stopping at Applicative (instead of going to Monad) is the structural reason these helpers can exist at all — a Monadic parser would have to inspect run-time values, defeating the static introspection. --- src/CLI/Parser.hs | 322 +++++++++++----------------------------------- 1 file changed, 75 insertions(+), 247 deletions(-) diff --git a/src/CLI/Parser.hs b/src/CLI/Parser.hs index 4807fde3..b41d6495 100644 --- a/src/CLI/Parser.hs +++ b/src/CLI/Parser.hs @@ -10,6 +10,48 @@ import Options.Applicative import qualified Options.Applicative as OA import Version (buildTarget, gitHash, gitTag, version) +-- --------------------------------------------------------------------------- +-- Option-builder helpers +-- +-- optparse-applicative is the textbook /Free Applicative/ over a primitive +-- @Parser@: composition with @\<*\>@ keeps the static structure exposed +-- (which is why @--help@ generation works), and the helpers below factor +-- the four shapes that dominated this file before — Text option, Int +-- option, switch, positional Text arg — into one line each. +-- --------------------------------------------------------------------------- + +-- | @strOption@ with @long/metavar/help@, optionally a short alias. +strOpt :: String -> Maybe Char -> String -> String -> Parser String +strOpt l ms m h = strOption (long l <> maybe mempty short ms <> metavar m <> help h) + +-- | @optional strOpt@ — most of the global / command parsers use this. +optStrOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe String) +optStrOpt l ms m h = optional (strOpt l ms m h) + +-- | Text variant: read as String then pack. +textOpt :: String -> Maybe Char -> String -> String -> Parser Text +textOpt l ms m h = T.pack <$> strOpt l ms m h + +-- | @optional textOpt@. +optTextOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe Text) +optTextOpt l ms m h = optional (textOpt l ms m h) + +-- | @option auto@ for Int-shaped options (limit, offset, port, depth…). +intOpt :: String -> Maybe Char -> String -> String -> Parser Int +intOpt l ms m h = option auto (long l <> maybe mempty short ms <> metavar m <> help h) + +-- | @optional intOpt@. +optIntOpt :: String -> Maybe Char -> String -> String -> Parser (Maybe Int) +optIntOpt l ms m h = optional (intOpt l ms m h) + +-- | Positional @Text@ argument with metavar + help. +textArg :: String -> String -> Parser Text +textArg m h = T.pack <$> argument str (metavar m <> help h) + +-- | Positional @String@ argument with metavar + help (for filenames). +strArg :: String -> String -> Parser String +strArg m h = argument str (metavar m <> help h) + {- | Main CLI parser combining global options and optional command If no command is given, just load database and exit (useful for cache generation) -} @@ -19,74 +61,18 @@ cliParser = CLIConfig <$> globalOptionsParser <*> optional commandParser -- | Global options parser (applied before commands) globalOptionsParser :: Parser GlobalOptions globalOptionsParser = do - configFile <- - optional $ - strOption - ( long "config" - <> short 'c' - <> metavar "FILE" - <> help "TOML config file (required)" - ) - - dbName <- - optional $ - ( T.pack - <$> strOption - ( long "db" - <> metavar "NAME" - <> help "Database name to query (from config file)" - ) - ) - - methodsDir <- - optional $ - strOption - ( long "methods" - <> metavar "PATH" - <> help "Directory containing ILCD method XML files for LCIA" - ) - + configFile <- optStrOpt "config" (Just 'c') "FILE" "TOML config file (required)" + dbName <- optTextOpt "db" Nothing "NAME" "Database name to query (from config file)" + methodsDir <- optStrOpt "methods" Nothing "PATH" "Directory containing ILCD method XML files for LCIA" format <- optional $ option outputFormatReader - ( long "format" - <> metavar "FORMAT" - <> help "Output format: json|csv|table|pretty (default depends on command)" - ) - - jsonPath <- - optional $ - ( T.pack - <$> strOption - ( long "jsonpath" - <> metavar "PATH" - <> help "JSONPath for CSV extraction (required with --format csv). Examples: 'results', 'activity.exchanges'" - ) - ) - - noCache <- - switch - ( long "no-cache" - <> help "Disable caching for testing and development" - ) - - serverUrl <- - optional $ - strOption - ( long "url" - <> metavar "URL" - <> help "Server URL for HTTP client mode (or set VOLCA_URL env var)" - ) - - serverPassword <- - optional $ - strOption - ( long "password" - <> metavar "PASSWORD" - <> help "Password for authentication (or set VOLCA_PASSWORD env var)" - ) - + (long "format" <> metavar "FORMAT" <> help "Output format: json|csv|table|pretty (default depends on command)") + jsonPath <- optTextOpt "jsonpath" Nothing "PATH" "JSONPath for CSV extraction (required with --format csv). Examples: 'results', 'activity.exchanges'" + noCache <- switch (long "no-cache" <> help "Disable caching for testing and development") + serverUrl <- optStrOpt "url" Nothing "URL" "Server URL for HTTP client mode (or set VOLCA_URL env var)" + serverPassword <- optStrOpt "password" Nothing "PASSWORD" "Password for authentication (or set VOLCA_PASSWORD env var)" pure GlobalOptions{..} -- | Output format reader for optparse-applicative @@ -162,28 +148,14 @@ pluginParser = -- | Shared upload arguments parser (positional FILE, --name, --description) uploadArgsParser :: Parser UploadArgs uploadArgsParser = do - uaFile <- argument str (metavar "FILE" <> help "Archive or data file to upload (ZIP, 7z, tar.gz, tar.xz, XML, CSV)") - uaName <- - T.pack - <$> strOption - ( long "name" - <> short 'n' - <> metavar "NAME" - <> help "Display name (required)" - ) - uaDescription <- - optional $ - T.pack - <$> strOption - ( long "description" - <> metavar "TEXT" - <> help "Optional description" - ) + uaFile <- strArg "FILE" "Archive or data file to upload (ZIP, 7z, tar.gz, tar.xz, XML, CSV)" + uaName <- textOpt "name" (Just 'n') "NAME" "Display name (required)" + uaDescription <- optTextOpt "description" Nothing "TEXT" "Optional description" pure UploadArgs{..} -- | Delete name parser (positional NAME) deleteNameParser :: Parser Text -deleteNameParser = T.pack <$> argument str (metavar "NAME" <> help "Name of the resource to delete") +deleteNameParser = textArg "NAME" "Name of the resource to delete" -- | Server command parser serverParser :: Parser Command @@ -191,51 +163,14 @@ serverParser = Server <$> serverOptionsParser serverOptionsParser :: Parser ServerOptions serverOptionsParser = do - serverPort <- - optional $ - option - auto - ( long "port" - <> short 'p' - <> metavar "PORT" - <> help "Server port (overrides [server].port from config; default: 8080)" - ) + serverPort <- optIntOpt "port" (Just 'p') "PORT" "Server port (overrides [server].port from config; default: 8080)" serverLoadDbs <- optional $ - option - dbListReader - ( long "load" - <> metavar "DB1,DB2,..." - <> help "Comma-separated list of databases to load at startup (overrides config load=true)" - ) - serverDesktopMode <- - switch - ( long "desktop" - <> help "Desktop mode: print VOLCA_PORT=N on startup for launcher integration" - ) - serverStaticDir <- - optional $ - strOption - ( long "static-dir" - <> metavar "PATH" - <> help "Override default static file directory (default: web/dist)" - ) - serverIdleTimeout <- - option - auto - ( long "idle-timeout" - <> value 0 - <> metavar "SECONDS" - <> help "Shutdown after N seconds of inactivity (0=disabled, default: 0)" - ) - serverTreeDepth <- - option - auto - ( long "tree-depth" - <> value 2 - <> metavar "DEPTH" - <> help "Default max depth for the /tree endpoint (default: 2)" - ) + option dbListReader (long "load" <> metavar "DB1,DB2,..." <> help "Comma-separated list of databases to load at startup (overrides config load=true)") + serverDesktopMode <- switch (long "desktop" <> help "Desktop mode: print VOLCA_PORT=N on startup for launcher integration") + serverStaticDir <- optStrOpt "static-dir" Nothing "PATH" "Override default static file directory (default: web/dist)" + serverIdleTimeout <- option auto (long "idle-timeout" <> value 0 <> metavar "SECONDS" <> help "Shutdown after N seconds of inactivity (0=disabled, default: 0)") + serverTreeDepth <- option auto (long "tree-depth" <> value 2 <> metavar "DEPTH" <> help "Default max depth for the /tree endpoint (default: 2)") pure ServerOptions{..} -- | Reader for comma-separated list of database names @@ -270,88 +205,20 @@ flowSubCommandParser = -- | Search activities parser (now top-level) searchActivitiesParser :: Parser Command searchActivitiesParser = do - searchName <- - optional $ - strOption - ( long "name" - <> metavar "TERM" - <> help "Search by activity name" - ) - - searchGeo <- - optional $ - strOption - ( long "geo" - <> metavar "LOCATION" - <> help "Filter by geography (exact match)" - ) - - searchProduct <- - optional $ - strOption - ( long "product" - <> metavar "PRODUCT" - <> help "Filter by reference product" - ) - - searchLimit <- - optional $ - option - auto - ( long "limit" - <> metavar "N" - <> help "Limit number of results (max 1000, default 50)" - ) - - searchOffset <- - optional $ - option - auto - ( long "offset" - <> metavar "N" - <> help "Offset for pagination (default 0)" - ) - + searchName <- optTextOpt "name" Nothing "TERM" "Search by activity name" + searchGeo <- optTextOpt "geo" Nothing "LOCATION" "Filter by geography (exact match)" + searchProduct <- optTextOpt "product" Nothing "PRODUCT" "Filter by reference product" + searchLimit <- optIntOpt "limit" Nothing "N" "Limit number of results (max 1000, default 50)" + searchOffset <- optIntOpt "offset" Nothing "N" "Offset for pagination (default 0)" pure $ SearchActivities SearchActivitiesOptions{..} -- | Search flows parser (now top-level) searchFlowsParser :: Parser Command searchFlowsParser = do - searchQuery <- - optional $ - strOption - ( long "query" - <> short 'q' - <> metavar "TERM" - <> help "Search term for flow names and synonyms" - ) - - searchLang <- - optional $ - strOption - ( long "lang" - <> metavar "LANG" - <> help "Language for synonym search" - ) - - searchFlowsLimit <- - optional $ - option - auto - ( long "limit" - <> metavar "N" - <> help "Limit number of results" - ) - - searchFlowsOffset <- - optional $ - option - auto - ( long "offset" - <> metavar "N" - <> help "Offset for pagination" - ) - + searchQuery <- optTextOpt "query" (Just 'q') "TERM" "Search term for flow names and synonyms" + searchLang <- optTextOpt "lang" Nothing "LANG" "Language for synonym search" + searchFlowsLimit <- optIntOpt "limit" Nothing "N" "Limit number of results" + searchFlowsOffset <- optIntOpt "offset" Nothing "N" "Offset for pagination" pure $ SearchFlows SearchFlowsOptions{..} -- | Impacts (LCIA) command parser @@ -364,32 +231,9 @@ impactsParser = do -- | LCIA options parser lciaOptionsParser :: Parser LCIAOptions lciaOptionsParser = do - lciaMethodId <- - T.pack - <$> strOption - ( long "method" - <> short 'm' - <> metavar "METHOD_UUID" - <> help "Method UUID (method must be loaded on the server)" - ) - - lciaOutput <- - optional $ - strOption - ( long "output" - <> short 'o' - <> metavar "FILE" - <> help "Export results to XML ILCD format" - ) - - lciaCSV <- - optional $ - strOption - ( long "csv" - <> metavar "FILE" - <> help "Export results to CSV format" - ) - + lciaMethodId <- textOpt "method" (Just 'm') "METHOD_UUID" "Method UUID (method must be loaded on the server)" + lciaOutput <- optStrOpt "output" (Just 'o') "FILE" "Export results to XML ILCD format" + lciaCSV <- optStrOpt "csv" Nothing "FILE" "Export results to CSV format" pure LCIAOptions{..} -- | Debug matrices command parser @@ -402,29 +246,13 @@ debugMatricesParser = do -- | Debug matrices options parser debugMatricesOptionsParser :: Parser DebugMatricesOptions debugMatricesOptionsParser = do - debugOutput <- - strOption - ( long "output" - <> short 'o' - <> metavar "FILE" - <> help "Base filename for debug output (will generate _supply_chain.csv and _biosphere_matrix.csv)" - ) - - debugFlowFilter <- - optional $ - strOption - ( long "flow-filter" - <> metavar "FLOW" - <> help "Filter to specific biosphere flow (e.g., 'Sulphur dioxide')" - ) - + debugOutput <- strOpt "output" (Just 'o') "FILE" "Base filename for debug output (will generate _supply_chain.csv and _biosphere_matrix.csv)" + debugFlowFilter <- optTextOpt "flow-filter" Nothing "FLOW" "Filter to specific biosphere flow (e.g., 'Sulphur dioxide')" pure DebugMatricesOptions{..} -- | Export matrices parser exportMatricesParser :: Parser Command -exportMatricesParser = do - outputDir <- argument str (metavar "OUTPUT_DIR" <> help "Output directory for matrix export") - pure $ ExportMatrices outputDir +exportMatricesParser = ExportMatrices <$> strArg "OUTPUT_DIR" "Output directory for matrix export" {- | Flow mapping command parser (renamed from 'mapping' to disambiguate from compartment-mapping and similar resources). From ed74afdfdadb007934e115c3f8e3f02aadf7543b Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 01:21:57 +0200 Subject: [PATCH 10/43] chore(dbhandlers): drop unused Handler import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup leftover after the AppM migration in commit d0a9b1f. The file no longer references Servant's Handler directly — every former `Handler X` signature is now `AppM X`, so the `Handler` symbol is dead in the import list. Restores the build to the pre-branch warning count (2: pre-existing MCP unused-import / unused-match). --- src/API/DatabaseHandlers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs index c87f21fe..ae990c6e 100644 --- a/src/API/DatabaseHandlers.hs +++ b/src/API/DatabaseHandlers.hs @@ -54,7 +54,7 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.IO as T -import Servant (Handler, Header, Headers, addHeader, err400, err404, err500, errBody, throwError) +import Servant (Header, Headers, addHeader, err400, err404, err500, errBody, throwError) import qualified System.Directory import System.FilePath (()) From d810d3f146fbdc43ead7e04e1261638760860c14 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 11:06:19 +0200 Subject: [PATCH 11/43] refactor(schema): co-locate ToSchema with data declarations via Stripped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the orphan / compilation-cycle deferral from C2. Each domain data type now declares its own ToSchema next to its ToJSON / FromJSON instance, derived via the same Stripped newtype that already carries the JSON dictionaries. Effect on src/API/OpenApi.hs: 293 → 144 lines. The module loses its role as the orphan ToSchema collector and shrinks to a few special cases — Value (untyped JSON), LocationKind (lowercase wire codes that the generic schema would not surface), BinaryContent (OctetStream format), plus the small handful of Database.Manager domain types (MissingSupplier, DependencyStatus, DependencyChoice, LocationFallback, LocationUnresolved, DatabaseSetupInfo) — and the enrichWithResources post-processor that was always its real reason to exist. Three categories handled: 1. Records that already had Stripped JSON deriving — added ToSchema to the same `deriving (...) via (Stripped X)` clause. 2. Sum types with default Generic encoding (TechRole, BioDirection in Types.hs; NodeType, EdgeType, FlowRole in API/Types.hs) — added `deriving anyclass (ToSchema)`. 3. Polymorphic SearchResults a — standalone `deriving via (Stripped (SearchResults a)) instance ToSchema a => ToSchema (SearchResults a)` (the polymorphic context requires standalone). Two custom ToSchema instances (ApiFlow with its discriminated `kind` union; PerturbedEntry with its flattened Either) moved from OpenApi.hs to API/Types.hs alongside their data declarations — same logic, different home. The lens / openapi3 imports came along. Net: −53 LOC across three files. openapi.json byte-identical; hspec 1052/1052 green. Categorically: this is the same isomorphism transport (Coercible witness) as the JSON commits, just for a third typeclass. With ToJSON / FromJSON / ToSchema all derived through one newtype, the "this type's wire form" definition lives in one place — what Milewski calls /faithful representation/: the schema and the serializer cannot drift, because they share their Generic Rep. --- src/API/OpenApi.hs | 169 ++----------------------------- src/API/Types.hs | 242 +++++++++++++++++++++++++++++++-------------- src/Types.hs | 16 +-- 3 files changed, 187 insertions(+), 240 deletions(-) diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs index 57fd5a54..febd7612 100644 --- a/src/API/OpenApi.hs +++ b/src/API/OpenApi.hs @@ -18,17 +18,16 @@ import API.JsonOptions (strippedSchemaOptions) import API.Resources (Resource) import qualified API.Resources as R import API.Types -import Control.Lens ((%~), (&), (.~), (?~), (^.)) +import Control.Lens ((%~), (&), (?~), (^.)) import Data.Aeson (Value, toJSON) import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap import Data.OpenApi import qualified Data.OpenApi.Lens as OA -import Data.Proxy (Proxy (..)) import Data.Text (Text) import qualified Data.Text as T import Database.Manager (DatabaseSetupInfo, DependencyChoice, DependencyStatus, MissingSupplier) import Network.HTTP.Types.Method (StdMethod (..)) -import Types (BioDirection, BiosphereFlow, Compartment, Exchange, LocationFallback, LocationKind, LocationUnresolved, Pedigree, TechRole, TechnosphereFlow, Unit) +import Types (LocationFallback, LocationKind, LocationUnresolved) {- | Orphan schema instance forward declaration for the login request body. The real type lives in "API.Routes"; this is defined there and re-imported @@ -40,15 +39,8 @@ the type in "API.Routes" — see 'instance ToSchema LoginRequest' there. instance ToSchema Value where declareNamedSchema _ = pure $ NamedSchema (Just "JsonValue") mempty --- Domain types -instance ToSchema TechRole -instance ToSchema BioDirection -instance ToSchema Compartment where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema Unit where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema TechnosphereFlow where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema BiosphereFlow where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema Pedigree where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema Exchange where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions +-- Domain types: TechRole, BioDirection, Unit now derive ToSchema next to +-- their data declarations in src/Types.hs (via anyclass / DerivingVia). -- Database.Manager types instance ToSchema MissingSupplier @@ -78,155 +70,14 @@ instance ToSchema DatabaseSetupInfo where declareNamedSchema = genericDeclareNam -- API.Types — every record type uses strippedSchemaOptions so the generated -- OpenAPI spec matches the wire JSON keys produced by API.JsonOptions.stripLowerPrefix. -instance ToSchema ClassificationSystem where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance (ToSchema a) => ToSchema (SearchResults a) where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivitySummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ConsumerResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ConsumersResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowSearchResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema InventoryExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema InventoryMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema InventoryFlowDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema InventoryStatistics where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema TreeExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema TreeMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ExportNode where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema NodeType -instance ToSchema EdgeType -instance ToSchema TreeEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowSummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowRole -instance ToSchema GraphExport where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema GraphNode where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema GraphEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema LCIAResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ScoringIndicator where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema LCIABatchResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema BatchImpactsRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema BatchImpactsEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema BatchImpactsResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowContributionEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ContributingFlowsResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityContribution where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ContributingActivitiesResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MappingStatus where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema UnmappedFlowAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowCFMapping where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowCFEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema CharacterizationResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema CharacterizationEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema DatabaseListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema DatabaseStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivateResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema RelinkResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema DepLoadResult where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema LoadDatabaseResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema UploadRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema UploadResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MethodCollectionListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MethodCollectionStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema RefDataListResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema RefDataStatusAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SynonymGroupsResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MethodSummary where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MethodDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema MethodFactorAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SupplyChainResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SupplyChainEntry where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SupplyChainEdge where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema Aggregation where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema AggregationGroup where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SubstitutionRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema Substitution where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SensitivityRequest where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema SensitivityResponse where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions +-- ToSchema (SearchResults a) standalone-derived in API.Types via Stripped. +-- ToSchema for ApiFlow, NodeType, EdgeType, FlowRole now derived next to +-- their data declarations in src/API/Types.hs (NodeType/EdgeType/FlowRole +-- via anyclass; ApiFlow has a custom instance there to keep the discriminated +-- `kind` union representation). --- Manual schema for ApiFlow — discriminated by 'kind' so OpenAPI consumers --- see a real tagged union instead of a generic Either. -instance ToSchema ApiFlow where - declareNamedSchema _ = do - techRef <- declareSchemaRef (Proxy :: Proxy TechnosphereFlow) - bioRef <- declareSchemaRef (Proxy :: Proxy BiosphereFlow) - let kindEnum = - mempty - & type_ ?~ OpenApiString - & enum_ - ?~ [ toJSON ("technosphere" :: Text) - , toJSON ("biosphere" :: Text) - , toJSON ("unresolved" :: Text) - ] - tech = - mempty - & type_ ?~ OpenApiObject - & properties - .~ InsOrdHashMap.fromList - [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("technosphere" :: Text)])) - , ("flow", techRef) - ] - & required .~ ["kind", "flow"] - bio = - mempty - & type_ ?~ OpenApiObject - & properties - .~ InsOrdHashMap.fromList - [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("biosphere" :: Text)])) - , ("flow", bioRef) - ] - & required .~ ["kind", "flow"] - unresolved = - mempty - & type_ ?~ OpenApiObject - & properties - .~ InsOrdHashMap.fromList - [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("unresolved" :: Text)])) - , ("id", Inline (mempty & type_ ?~ OpenApiString & format ?~ "uuid")) - ] - & required .~ ["kind", "id"] - pure $ - NamedSchema (Just "ApiFlow") $ - mempty - & type_ ?~ OpenApiObject - & properties - .~ InsOrdHashMap.fromList - [ ("kind", Inline kindEnum) - ] - & required .~ ["kind"] - & OA.oneOf ?~ [Inline tech, Inline bio, Inline unresolved] +-- PerturbedEntry's custom schema moved to API.Types alongside its data decl. --- Manual schema: the Either inside PerturbedEntry is flattened by ToJSON --- to {perturbation, impact, deltaImpact} on success and {perturbation, error} --- on failure. The Generic-derived schema would expose the Haskell shape --- (a oneOf wrapper around the Either) instead of the flat wire format. -instance ToSchema PerturbedEntry where - declareNamedSchema _ = do - pertRef <- declareSchemaRef (Proxy :: Proxy Perturbation) - lciaRef <- declareSchemaRef (Proxy :: Proxy LCIAResult) - doubleRef <- declareSchemaRef (Proxy :: Proxy Double) - textRef <- declareSchemaRef (Proxy :: Proxy Text) - pure $ - NamedSchema (Just "PerturbedEntry") $ - mempty - & type_ ?~ OpenApiObject - & properties - .~ InsOrdHashMap.fromList - [ ("perturbation", pertRef) - , ("impact", lciaRef) - , ("deltaImpact", doubleRef) - , ("error", textRef) - ] - & required .~ ["perturbation"] -instance ToSchema Perturbation where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ExchangeDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ExchangeWithUnit where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityForAPI where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityMetadata where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityLinks where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ActivityStats where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema FlowDetail where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ClassificationEntryInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema ClassificationPresetInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions instance ToSchema BinaryContent where declareNamedSchema _ = pure $ diff --git a/src/API/Types.hs b/src/API/Types.hs index 81fb81d2..45a5ec74 100644 --- a/src/API/Types.hs +++ b/src/API/Types.hs @@ -1,16 +1,23 @@ +{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingVia #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE StandaloneDeriving #-} {-# OPTIONS_GHC -Wno-orphans #-} module API.Types where import API.JsonOptions (Stripped (..), strippedParseJSON, strippedToEncoding, strippedToJSON) +import Control.Lens ((&), (.~), (?~)) import Data.Aeson import Data.Aeson.Types (Parser) import qualified Data.ByteString.Lazy as BSL +import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap import qualified Data.Map as M +import Data.OpenApi (NamedSchema (..), OpenApiType (..), Referenced (..), ToSchema (..), declareSchemaRef, enum_, format, properties, required, type_) +import qualified Data.OpenApi.Lens as OA +import Data.Proxy (Proxy (..)) import qualified Data.Set as S import Data.Text (Text) import qualified Data.Text as T @@ -95,7 +102,60 @@ instance FromJSON ApiFlow where "unresolved" -> ApiUnresolvedFlow <$> o .: "id" other -> fail $ "ApiFlow.kind must be \"technosphere\", \"biosphere\", \"waste\", or \"unresolved\", got: " <> T.unpack other --- | Search response combining results and count +-- | Manual schema for ApiFlow — discriminated by 'kind' so OpenAPI consumers +-- see a real tagged union instead of a generic Either. +instance ToSchema ApiFlow where + declareNamedSchema _ = do + techRef <- declareSchemaRef (Proxy :: Proxy TechnosphereFlow) + bioRef <- declareSchemaRef (Proxy :: Proxy BiosphereFlow) + let kindEnum = + mempty + & type_ ?~ OpenApiString + & enum_ + ?~ [ toJSON ("technosphere" :: Text) + , toJSON ("biosphere" :: Text) + , toJSON ("unresolved" :: Text) + ] + tech = + mempty + & type_ ?~ OpenApiObject + & properties + .~ InsOrdHashMap.fromList + [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("technosphere" :: Text)])) + , ("flow", techRef) + ] + & required .~ ["kind", "flow"] + bio = + mempty + & type_ ?~ OpenApiObject + & properties + .~ InsOrdHashMap.fromList + [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("biosphere" :: Text)])) + , ("flow", bioRef) + ] + & required .~ ["kind", "flow"] + unresolved = + mempty + & type_ ?~ OpenApiObject + & properties + .~ InsOrdHashMap.fromList + [ ("kind", Inline (mempty & type_ ?~ OpenApiString & enum_ ?~ [toJSON ("unresolved" :: Text)])) + , ("id", Inline (mempty & type_ ?~ OpenApiString & format ?~ "uuid")) + ] + & required .~ ["kind", "id"] + pure $ + NamedSchema (Just "ApiFlow") $ + mempty + & type_ ?~ OpenApiObject + & properties + .~ InsOrdHashMap.fromList + [ ("kind", Inline kindEnum) + ] + & required .~ ["kind"] + & OA.oneOf ?~ [Inline tech, Inline bio, Inline unresolved] + +-- | Search response combining results and count. ToSchema is added below +-- via a standalone deriving (needed because of the `(ToSchema a) =>` context). data SearchResults a = SearchResults { srResults :: [a] -- The actual search results , srTotal :: Int -- Total count of all matching items (before pagination) @@ -106,6 +166,10 @@ data SearchResults a = SearchResults } deriving (Generic) +deriving via (Stripped (SearchResults a)) instance ToJSON a => ToJSON (SearchResults a) +deriving via (Stripped (SearchResults a)) instance FromJSON a => FromJSON (SearchResults a) +deriving via (Stripped (SearchResults a)) instance ToSchema a => ToSchema (SearchResults a) + -- | Minimal activity information for navigation data ActivitySummary = ActivitySummary { prsProcessId :: Text -- ProcessId format: activity_uuid_product_uuid @@ -118,7 +182,7 @@ data ActivitySummary = ActivitySummary , prsAllocationFormula :: Maybe Text -- Raw SimaPro allocation formula; Nothing if purely numeric } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivitySummary) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivitySummary) -- | Consumer result — ActivitySummary enriched with BFS depth from the queried supplier data ConsumerResult = ConsumerResult @@ -132,7 +196,7 @@ data ConsumerResult = ConsumerResult , crClassifications :: M.Map Text Text -- Classifications (ISIC, CPC, Category, etc.), mirrors SupplyChainEntry } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ConsumerResult) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ConsumerResult) {- | Wrapper for /consumers responses. Mirrors 'SupplyChainResponse' so clients have a uniform {entries, edges} shape in both traversal directions. Edges @@ -144,7 +208,7 @@ data ConsumersResponse = ConsumersResponse , crrEdges :: ![SupplyChainEdge] } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ConsumersResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ConsumersResponse) -- | Enhanced flow information for search results (now includes synonyms) data FlowSearchResult = FlowSearchResult @@ -155,7 +219,7 @@ data FlowSearchResult = FlowSearchResult , fsrSynonyms :: M.Map Text [Text] -- Synonyms by language (converted from Set to List for JSON) } deriving (Generic) - deriving (ToJSON) via (Stripped FlowSearchResult) + deriving (ToJSON, ToSchema) via (Stripped FlowSearchResult) -- | Inventory export data structures data InventoryExport = InventoryExport @@ -164,7 +228,7 @@ data InventoryExport = InventoryExport , ieStatistics :: InventoryStatistics } deriving (Generic) - deriving (ToJSON) via (Stripped InventoryExport) + deriving (ToJSON, ToSchema) via (Stripped InventoryExport) data InventoryMetadata = InventoryMetadata { imRootActivity :: ActivitySummary @@ -173,7 +237,7 @@ data InventoryMetadata = InventoryMetadata , imResourceFlows :: Int -- Biosphere inputs (resource extraction) } deriving (Generic) - deriving (ToJSON) via (Stripped InventoryMetadata) + deriving (ToJSON, ToSchema) via (Stripped InventoryMetadata) data InventoryFlowDetail = InventoryFlowDetail { ifdFlow :: BiosphereFlow -- Inventory flows are always biosphere @@ -183,7 +247,7 @@ data InventoryFlowDetail = InventoryFlowDetail , ifdCategory :: Text -- Flow category for grouping } deriving (Generic) - deriving (ToJSON) via (Stripped InventoryFlowDetail) + deriving (ToJSON, ToSchema) via (Stripped InventoryFlowDetail) data InventoryStatistics = InventoryStatistics { isTotalQuantity :: Double -- Sum of absolute values @@ -192,7 +256,7 @@ data InventoryStatistics = InventoryStatistics , isTopCategories :: [(Text, Int)] -- Top flow categories by count } deriving (Generic) - deriving (ToJSON) via (Stripped InventoryStatistics) + deriving (ToJSON, ToSchema) via (Stripped InventoryStatistics) -- | Tree export data structures for visualization data TreeExport = TreeExport @@ -201,7 +265,7 @@ data TreeExport = TreeExport , teEdges :: [TreeEdge] } deriving (Generic) - deriving (ToJSON) via (Stripped TreeExport) + deriving (ToJSON, ToSchema) via (Stripped TreeExport) data TreeMetadata = TreeMetadata { tmRootId :: Text -- Changed to Text (ProcessId format) @@ -212,7 +276,7 @@ data TreeMetadata = TreeMetadata , tmExpandableNodes :: Int -- Nodes that could expand further } deriving (Generic) - deriving (ToJSON) via (Stripped TreeMetadata) + deriving (ToJSON, ToSchema) via (Stripped TreeMetadata) data ExportNode = ExportNode { enId :: Text -- Changed to Text (ProcessId format) @@ -228,13 +292,15 @@ data ExportNode = ExportNode , enCompartment :: Maybe Text -- Biosphere compartment (air/water/soil), only for BiosphereNodes } deriving (Generic) - deriving (ToJSON) via (Stripped ExportNode) + deriving (ToJSON, ToSchema) via (Stripped ExportNode) data NodeType = ActivityNode | LoopNode | BiosphereEmissionNode | BiosphereResourceNode deriving (Eq, Show, Generic) + deriving anyclass (ToSchema) data EdgeType = TechnosphereEdge | BiosphereEmissionEdge | BiosphereResourceEdge deriving (Eq, Show, Generic) + deriving anyclass (ToSchema) data TreeEdge = TreeEdge { teFrom :: Text -- Changed to Text (ProcessId format) @@ -245,7 +311,7 @@ data TreeEdge = TreeEdge , teEdgeType :: EdgeType -- Type of edge (technosphere or biosphere) } deriving (Generic) - deriving (ToJSON) via (Stripped TreeEdge) + deriving (ToJSON, ToSchema) via (Stripped TreeEdge) data FlowInfo = FlowInfo { fiId :: UUID @@ -253,7 +319,7 @@ data FlowInfo = FlowInfo , fiCategory :: Text } deriving (Generic) - deriving (ToJSON) via (Stripped FlowInfo) + deriving (ToJSON, ToSchema) via (Stripped FlowInfo) -- | Graph export data structures for network visualization data GraphExport = GraphExport @@ -262,7 +328,7 @@ data GraphExport = GraphExport , geUnitGroups :: M.Map Text Text -- Unit to unit group mapping } deriving (Generic) - deriving (ToJSON) via (Stripped GraphExport) + deriving (ToJSON, ToSchema) via (Stripped GraphExport) data GraphNode = GraphNode { gnNodeId :: Int -- Numeric ID for efficient frontend processing @@ -273,7 +339,7 @@ data GraphNode = GraphNode , gnLocation :: Text -- Geography } deriving (Generic) - deriving (ToJSON) via (Stripped GraphNode) + deriving (ToJSON, ToSchema) via (Stripped GraphNode) data GraphEdge = GraphEdge { geSource :: Int -- Source node ID @@ -283,7 +349,7 @@ data GraphEdge = GraphEdge , geFlowName :: Text -- Name of the flow } deriving (Generic) - deriving (ToJSON) via (Stripped GraphEdge) + deriving (ToJSON, ToSchema) via (Stripped GraphEdge) {- | Lightweight flow information for lists. Carries either a tech or a bio flow; the @ApiFlow@ tag is the wire discriminator. @@ -295,11 +361,12 @@ data FlowSummary = FlowSummary , fsRole :: FlowRole -- Role in this specific activity } deriving (Generic) - deriving (ToJSON) via (Stripped FlowSummary) + deriving (ToJSON, ToSchema) via (Stripped FlowSummary) -- | Role of a flow in a specific activity context data FlowRole = InputFlow | OutputFlow | ReferenceProductFlow deriving (Show, Generic) + deriving anyclass (ToSchema) -- Synonym types removed - synonyms are now included directly in flow responses @@ -313,14 +380,14 @@ data MethodSummary = MethodSummary , msmCollection :: Text -- Parent collection name (e.g., "ef-31") } deriving (Generic) - deriving (ToJSON) via (Stripped MethodSummary) + deriving (ToJSON, ToSchema) via (Stripped MethodSummary) -- | Method collection list response newtype MethodCollectionListResponse = MethodCollectionListResponse { mclMethods :: [MethodCollectionStatusAPI] } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped MethodCollectionListResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped MethodCollectionListResponse) -- | Method collection status for API responses data MethodCollectionStatusAPI = MethodCollectionStatusAPI @@ -334,14 +401,14 @@ data MethodCollectionStatusAPI = MethodCollectionStatusAPI , mcaFormat :: Maybe Text -- Format (e.g. "ILCD") } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped MethodCollectionStatusAPI) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped MethodCollectionStatusAPI) -- | Reference data list response (flow synonyms, compartment mappings, units) newtype RefDataListResponse = RefDataListResponse { rdlItems :: [RefDataStatusAPI] } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped RefDataListResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped RefDataListResponse) -- | Reference data status for API responses data RefDataStatusAPI = RefDataStatusAPI @@ -354,14 +421,14 @@ data RefDataStatusAPI = RefDataStatusAPI , rdaEntryCount :: Int } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped RefDataStatusAPI) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped RefDataStatusAPI) -- | Synonym groups response newtype SynonymGroupsResponse = SynonymGroupsResponse { sgrGroups :: [[Text]] } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped SynonymGroupsResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped SynonymGroupsResponse) -- | Full method details data MethodDetail = MethodDetail @@ -374,7 +441,7 @@ data MethodDetail = MethodDetail , mdFactorCount :: Int } deriving (Generic) - deriving (ToJSON) via (Stripped MethodDetail) + deriving (ToJSON, ToSchema) via (Stripped MethodDetail) -- | Characterization factor for API response data MethodFactorAPI = MethodFactorAPI @@ -384,7 +451,7 @@ data MethodFactorAPI = MethodFactorAPI , mfaValue :: Double -- CF value } deriving (Generic) - deriving (ToJSON) via (Stripped MethodFactorAPI) + deriving (ToJSON, ToSchema) via (Stripped MethodFactorAPI) -- | A single flow's contribution to an LCIA score data FlowContributionEntry = FlowContributionEntry @@ -397,7 +464,7 @@ data FlowContributionEntry = FlowContributionEntry , fcoCfValue :: Double -- Raw characterization factor value } deriving (Generic) - deriving (ToJSON) via (Stripped FlowContributionEntry) + deriving (ToJSON, ToSchema) via (Stripped FlowContributionEntry) -- | LCIA result for a single impact category data LCIAResult = LCIAResult @@ -414,7 +481,7 @@ data LCIAResult = LCIAResult , lrTopContributors :: [FlowContributionEntry] -- Top contributing elementary flows } deriving (Generic) - deriving (ToJSON) via (Stripped LCIAResult) + deriving (ToJSON, ToSchema) via (Stripped LCIAResult) -- | Contributing flows result: top elementary flows for a specific impact category data ContributingFlowsResult = ContributingFlowsResult @@ -424,7 +491,7 @@ data ContributingFlowsResult = ContributingFlowsResult , cfrTopFlows :: [FlowContributionEntry] } deriving (Generic) - deriving (ToJSON) via (Stripped ContributingFlowsResult) + deriving (ToJSON, ToSchema) via (Stripped ContributingFlowsResult) -- | A single activity's contribution to an LCIA score data ActivityContribution = ActivityContribution @@ -436,7 +503,7 @@ data ActivityContribution = ActivityContribution , acSharePct :: Double -- Percentage of total score (0-100) } deriving (Generic) - deriving (ToJSON) via (Stripped ActivityContribution) + deriving (ToJSON, ToSchema) via (Stripped ActivityContribution) -- | Contributing activities result: top upstream activities for a specific impact category data ContributingActivitiesResult = ContributingActivitiesResult @@ -446,14 +513,14 @@ data ContributingActivitiesResult = ContributingActivitiesResult , carActivities :: [ActivityContribution] } deriving (Generic) - deriving (ToJSON) via (Stripped ContributingActivitiesResult) + deriving (ToJSON, ToSchema) via (Stripped ContributingActivitiesResult) -- | Batch impacts request: compute LCIA for every process in one call. newtype BatchImpactsRequest = BatchImpactsRequest { birProcessIds :: [Text] } deriving (Generic) - deriving (FromJSON) via (Stripped BatchImpactsRequest) + deriving (FromJSON, ToSchema) via (Stripped BatchImpactsRequest) -- | One entry of a batch impacts response. data BatchImpactsEntry = BatchImpactsEntry @@ -462,7 +529,7 @@ data BatchImpactsEntry = BatchImpactsEntry , bieImpacts :: LCIABatchResult } deriving (Generic) - deriving (ToJSON) via (Stripped BatchImpactsEntry) + deriving (ToJSON, ToSchema) via (Stripped BatchImpactsEntry) {- | Batch impacts response: one entry per successfully computed process, plus lists of process ids that could not be resolved. @@ -473,7 +540,7 @@ data BatchImpactsResponse = BatchImpactsResponse , birInvalid :: [Text] } deriving (Generic) - deriving (ToJSON) via (Stripped BatchImpactsResponse) + deriving (ToJSON, ToSchema) via (Stripped BatchImpactsResponse) {- | A single scoring-set indicator: the per-variable normalized-weighted value plus the impact category it came from. Value is pre-multiplied by the @@ -484,7 +551,7 @@ data ScoringIndicator = ScoringIndicator , siValue :: Double } deriving (Generic) - deriving (ToJSON) via (Stripped ScoringIndicator) + deriving (ToJSON, ToSchema) via (Stripped ScoringIndicator) -- | Batch LCIA result with optional single score data LCIABatchResult = LCIABatchResult @@ -501,7 +568,7 @@ data LCIABatchResult = LCIABatchResult -- ^ Scoring set name → (variable name → indicator). One row per scoring variable. } deriving (Generic) - deriving (ToJSON) via (Stripped LCIABatchResult) + deriving (ToJSON, ToSchema) via (Stripped LCIABatchResult) -- | Flow mapping status for a method data MappingStatus = MappingStatus @@ -519,7 +586,7 @@ data MappingStatus = MappingStatus , mstUnmappedFlows :: [UnmappedFlowAPI] -- Details of unmapped flows } deriving (Generic) - deriving (ToJSON) via (Stripped MappingStatus) + deriving (ToJSON, ToSchema) via (Stripped MappingStatus) -- | Details about an unmapped flow data UnmappedFlowAPI = UnmappedFlowAPI @@ -528,7 +595,7 @@ data UnmappedFlowAPI = UnmappedFlowAPI , ufaDirection :: Text -- "Input" or "Output" } deriving (Generic) - deriving (ToJSON) via (Stripped UnmappedFlowAPI) + deriving (ToJSON, ToSchema) via (Stripped UnmappedFlowAPI) -- | DB-flow-centric mapping: all biosphere flows with their CF assignments data FlowCFMapping = FlowCFMapping @@ -539,7 +606,7 @@ data FlowCFMapping = FlowCFMapping , fcmFlows :: [FlowCFEntry] } deriving (Generic) - deriving (ToJSON) via (Stripped FlowCFMapping) + deriving (ToJSON, ToSchema) via (Stripped FlowCFMapping) -- | A single DB biosphere flow with its CF assignment (if any) data FlowCFEntry = FlowCFEntry @@ -551,7 +618,7 @@ data FlowCFEntry = FlowCFEntry , fceMatchStrategy :: Maybe Text -- "uuid" | "name" | "synonym" } deriving (Generic) - deriving (ToJSON) via (Stripped FlowCFEntry) + deriving (ToJSON, ToSchema) via (Stripped FlowCFEntry) -- | Characterization result: matched CFs for a method in a database data CharacterizationResult = CharacterizationResult @@ -562,7 +629,7 @@ data CharacterizationResult = CharacterizationResult , chrFactors :: [CharacterizationEntry] } deriving (Generic) - deriving (ToJSON) via (Stripped CharacterizationResult) + deriving (ToJSON, ToSchema) via (Stripped CharacterizationResult) -- | A single matched characterization factor data CharacterizationEntry = CharacterizationEntry @@ -578,14 +645,14 @@ data CharacterizationEntry = CharacterizationEntry , cheMatchStrategy :: Text -- "uuid", "cas", "name", "synonym", "fuzzy" } deriving (Generic) - deriving (ToJSON) via (Stripped CharacterizationEntry) + deriving (ToJSON, ToSchema) via (Stripped CharacterizationEntry) -- | Database list response newtype DatabaseListResponse = DatabaseListResponse { dlrDatabases :: [DatabaseStatusAPI] -- All available databases } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped DatabaseListResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped DatabaseListResponse) -- | Database status for API responses data DatabaseStatusAPI = DatabaseStatusAPI @@ -601,7 +668,7 @@ data DatabaseStatusAPI = DatabaseStatusAPI , dsaDependsOn :: [Text] -- Names of databases this one depends on (for cross-DB linking) } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped DatabaseStatusAPI) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped DatabaseStatusAPI) -- | Response for database activation data ActivateResponse = ActivateResponse @@ -610,7 +677,7 @@ data ActivateResponse = ActivateResponse , arDatabase :: Maybe DatabaseStatusAPI } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivateResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivateResponse) {- | Response for the re-link endpoint: fresh cross-DB link stats after a second-pass linking against the currently-loaded databases. @@ -623,21 +690,21 @@ data RelinkResponse = RelinkResponse , rrDependsOn :: [Text] } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped RelinkResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped RelinkResponse) -- | Result of auto-loading a single dependency data DepLoadResult = DepLoaded {dlrName :: Text} | DepLoadFailed {dlfName :: Text, dlfError :: Text} deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped DepLoadResult) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped DepLoadResult) -- | Response for the load database endpoint data LoadDatabaseResponse = LoadFailed {ldrError :: Text} | LoadSucceeded {ldrDatabase :: DatabaseStatusAPI, ldrDeps :: [DepLoadResult]} deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped LoadDatabaseResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped LoadDatabaseResponse) -- | Request for database upload (base64-encoded ZIP) data UploadRequest = UploadRequest @@ -646,7 +713,7 @@ data UploadRequest = UploadRequest , urFileData :: Text -- Base64-encoded ZIP file content } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped UploadRequest) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped UploadRequest) -- | Response for database upload data UploadResponse = UploadResponse @@ -656,7 +723,7 @@ data UploadResponse = UploadResponse , uprFormat :: Maybe Text -- Detected format (if successful) } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped UploadResponse) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped UploadResponse) -- | Supply chain response — all upstream activities with scaling factors data SupplyChainResponse = SupplyChainResponse @@ -667,7 +734,7 @@ data SupplyChainResponse = SupplyChainResponse , scrEdges :: [SupplyChainEdge] } deriving (Generic) - deriving (ToJSON) via (Stripped SupplyChainResponse) + deriving (ToJSON, ToSchema) via (Stripped SupplyChainResponse) {- | A single entry in the supply chain. @sceProcessId@ is bare for entries from the root DB and qualified (@"dbName::pid"@) for entries reached via @@ -687,7 +754,7 @@ data SupplyChainEntry = SupplyChainEntry , sceUpstreamCount :: Int -- number of unique upstream activities reachable from this one } deriving (Generic) - deriving (ToJSON) via (Stripped SupplyChainEntry) + deriving (ToJSON, ToSchema) via (Stripped SupplyChainEntry) -- | An edge in the upstream supply chain subgraph data SupplyChainEdge = SupplyChainEdge @@ -698,7 +765,7 @@ data SupplyChainEdge = SupplyChainEdge , sceEdgeAmount :: Double -- technosphere coefficient } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped SupplyChainEdge) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped SupplyChainEdge) {- | Request body for POST endpoints that accept substitutions. Substitutions modify the scaling vector via Sherman-Morrison rank-1 updates. @@ -707,7 +774,7 @@ newtype SubstitutionRequest = SubstitutionRequest { srSubstitutions :: [Substitution] } deriving (Generic) - deriving (FromJSON) via (Stripped SubstitutionRequest) + deriving (FromJSON, ToSchema) via (Stripped SubstitutionRequest) {- | A single supplier substitution. @@ -724,7 +791,7 @@ data Substitution = Substitution , subConsumer :: Text -- Consumer activity ProcessId (bare or dbName::pid) } deriving (Generic) - deriving (FromJSON) via (Stripped Substitution) + deriving (FromJSON, ToSchema) via (Stripped Substitution) {- | A single rank-1 perturbation of a technosphere coefficient @A_ij@. @@ -739,14 +806,14 @@ data Perturbation = Perturbation , perLabel :: Maybe Text -- Optional label for response correlation } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped Perturbation) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped Perturbation) -- | Request body for POST sensitivity endpoints. Flat list, V1. newtype SensitivityRequest = SensitivityRequest { srPerturbations :: [Perturbation] } deriving (Generic) - deriving (FromJSON) via (Stripped SensitivityRequest) + deriving (FromJSON, ToSchema) via (Stripped SensitivityRequest) {- | One result entry per perturbation. The 'peResult' carries either an error message ('Left') or the (impact, deltaImpact) pair ('Right'). The @@ -761,13 +828,36 @@ data PerturbedEntry = PerturbedEntry } deriving (Generic) +-- | Manual schema for PerturbedEntry: the Either inside is flattened by ToJSON +-- to {perturbation, impact, deltaImpact} on success and {perturbation, error} +-- on failure. The Generic-derived schema would expose the Haskell shape +-- (a oneOf wrapper around the Either) instead of the flat wire format. +instance ToSchema PerturbedEntry where + declareNamedSchema _ = do + pertRef <- declareSchemaRef (Proxy :: Proxy Perturbation) + lciaRef <- declareSchemaRef (Proxy :: Proxy LCIAResult) + doubleRef <- declareSchemaRef (Proxy :: Proxy Double) + textRef <- declareSchemaRef (Proxy :: Proxy Text) + pure $ + NamedSchema (Just "PerturbedEntry") $ + mempty + & type_ ?~ OpenApiObject + & properties + .~ InsOrdHashMap.fromList + [ ("perturbation", pertRef) + , ("impact", lciaRef) + , ("deltaImpact", doubleRef) + , ("error", textRef) + ] + & required .~ ["perturbation"] + -- | Sensitivity response: baseline LCIA + one entry per perturbation (in order). data SensitivityResponse = SensitivityResponse { srBaseline :: LCIAResult , srPerturbed :: [PerturbedEntry] } deriving (Generic) - deriving (ToJSON) via (Stripped SensitivityResponse) + deriving (ToJSON, ToSchema) via (Stripped SensitivityResponse) {- | Name of the request-level "root" database — the DB extracted from the URL path and the implicit target of any bare 'ProcessId' (one without the @@ -817,7 +907,7 @@ data ExchangeWithUnit = ExchangeWithUnit , ewuPedigree :: Maybe Pedigree -- LCA data-quality scores when available (mirrors exchangePedigree) } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ExchangeWithUnit) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ExchangeWithUnit) -- | Activity information optimized for API responses data ActivityForAPI = ActivityForAPI @@ -835,7 +925,7 @@ data ActivityForAPI = ActivityForAPI , pfaExchanges :: [ExchangeWithUnit] -- Exchanges with unit names } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivityForAPI) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityForAPI) -- | Streamlined activity information - core data only data ActivityInfo = ActivityInfo @@ -845,7 +935,7 @@ data ActivityInfo = ActivityInfo , piLinks :: ActivityLinks -- Links to sub-resources } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivityInfo) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityInfo) -- | Extended activity metadata data ActivityMetadata = ActivityMetadata @@ -857,7 +947,7 @@ data ActivityMetadata = ActivityMetadata , pmReferenceProductFlow :: Maybe UUID -- Flow ID of reference product } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivityMetadata) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityMetadata) -- | Links to related resources data ActivityLinks = ActivityLinks @@ -867,7 +957,7 @@ data ActivityLinks = ActivityLinks , plReferenceProductUrl :: Maybe Text -- URL to reference product (if exists) } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivityLinks) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityLinks) -- | Activity statistics data ActivityStats = ActivityStats @@ -877,7 +967,7 @@ data ActivityStats = ActivityStats , psLocation :: Text } deriving (Generic) - deriving (ToJSON, FromJSON) via (Stripped ActivityStats) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped ActivityStats) -- | Flow with additional metadata. Carries either a tech or bio flow. data FlowDetail = FlowDetail @@ -886,7 +976,7 @@ data FlowDetail = FlowDetail , fdUsageCount :: Int -- How many activities use this flow } deriving (Generic) - deriving (ToJSON) via (Stripped FlowDetail) + deriving (ToJSON, ToSchema) via (Stripped FlowDetail) {- | Exchange with flow, unit, and target activity information. The carried flow's variant lines up with the Exchange variant. @@ -900,7 +990,7 @@ data ExchangeDetail = ExchangeDetail , edTargetActivity :: Maybe ActivitySummary -- Target activity for technosphere inputs } deriving (Generic) - deriving (ToJSON) via (Stripped ExchangeDetail) + deriving (ToJSON, ToSchema) via (Stripped ExchangeDetail) -- | A single filter entry returned in a preset data ClassificationEntryInfo = ClassificationEntryInfo @@ -909,7 +999,7 @@ data ClassificationEntryInfo = ClassificationEntryInfo , ceiMode :: !Text -- "exact" or "contains" } deriving (Show, Eq, Generic) - deriving (ToJSON) via (Stripped ClassificationEntryInfo) + deriving (ToJSON, ToSchema) via (Stripped ClassificationEntryInfo) -- | A named filter preset (from TOML config) data ClassificationPresetInfo = ClassificationPresetInfo @@ -919,7 +1009,7 @@ data ClassificationPresetInfo = ClassificationPresetInfo , cpiFilters :: ![ClassificationEntryInfo] } deriving (Show, Eq, Generic) - deriving (ToJSON) via (Stripped ClassificationPresetInfo) + deriving (ToJSON, ToSchema) via (Stripped ClassificationPresetInfo) -- | Classification system with its values for browsing/filtering data ClassificationSystem = ClassificationSystem @@ -928,7 +1018,7 @@ data ClassificationSystem = ClassificationSystem , csActivityCount :: Int -- How many activities have this system } deriving (Generic) - deriving (ToJSON) via (Stripped ClassificationSystem) + deriving (ToJSON, ToSchema) via (Stripped ClassificationSystem) {- | Result of an /activity/{pid}/aggregate call. @@ -943,7 +1033,7 @@ data Aggregation = Aggregation , aggGroups :: [AggregationGroup] -- one entry per group_by bucket (empty when group_by omitted) } deriving (Generic) - deriving (ToJSON) via (Stripped Aggregation) + deriving (ToJSON, ToSchema) via (Stripped Aggregation) -- | One bucket in an aggregation result. data AggregationGroup = AggregationGroup @@ -954,16 +1044,18 @@ data AggregationGroup = AggregationGroup , aggCount :: Int } deriving (Generic) - deriving (ToJSON) via (Stripped AggregationGroup) + deriving (ToJSON, ToSchema) via (Stripped AggregationGroup) -- JSON instances. All record types use API.JsonOptions.stripLowerPrefix -- via the strippedToJSON/strippedToEncoding/strippedParseJSON helpers. -- Sum-only types (NodeType, EdgeType, FlowRole) keep default derivation. -instance (ToJSON a) => ToJSON (SearchResults a) where toJSON = strippedToJSON; toEncoding = strippedToEncoding +-- ToJSON / FromJSON / ToSchema for SearchResults a are standalone-derived +-- alongside the data declaration above (line ~169). instance ToJSON NodeType instance ToJSON EdgeType instance ToJSON FlowRole -instance ToJSON Unit where toJSON = strippedToJSON; toEncoding = strippedToEncoding +-- ToJSON / FromJSON / ToSchema for Unit are derived via Stripped alongside +-- its data declaration in src/Types.hs. -- Custom ToJSON for PerturbedEntry: flatten the Either so success entries -- have impact+deltaImpact and error entries have error. @@ -975,7 +1067,7 @@ instance ToJSON PerturbedEntry where Right (lcia, d) -> ["impact" .= lcia, "deltaImpact" .= d] -- FromJSON instances needed for API conversion -instance (FromJSON a) => FromJSON (SearchResults a) where parseJSON = strippedParseJSON +-- (FromJSON (SearchResults a) above) -- openapi3 cannot derive ToSchema for BSL.ByteString directly newtype BinaryContent = BinaryContent BSL.ByteString diff --git a/src/Types.hs b/src/Types.hs index 47ddec62..b054886e 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -36,6 +36,7 @@ import Data.List (nub) import Search.BM25.Types (BM25Index) import SynonymDB (normalizeName) import SynonymDB.Types (SynonymDB) +import Data.OpenApi (ToSchema) -- | Orphan Store instance for UUID (16 bytes, host-native word order) instance Store UUID where @@ -65,7 +66,7 @@ data Compartment = Compartment , compartmentSub :: !(Maybe Text) -- "high. pop.", "river water", … } deriving (Eq, Show, Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped Compartment) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped Compartment) {- | The biosphere flow's medium (air | water | soil | …), or @""@ when the source dataset omitted the compartment. Use 'bfCompartment' directly when @@ -90,6 +91,7 @@ biosphere side also gets named variants instead of a load-bearing 'Bool'. -} data BioDirection = Resource | Emission deriving (Eq, Show, Generic, NFData, Store) + deriving anyclass (ToSchema) {- | Role of a technosphere exchange within its host activity. Names the four valid combinations of (input?, reference?). `ReferenceInput` is the @@ -102,6 +104,7 @@ data TechRole | ReferenceInput -- main input of a treatment process | Input -- ordinary technosphere input deriving (Eq, Show, Generic, NFData, Store) + deriving anyclass (ToSchema) -- | Unit representation (kg, MJ, m³, etc.) data Unit = Unit @@ -111,6 +114,7 @@ data Unit = Unit , unitComment :: !Text -- Description/comment } deriving (Generic, NFData, Store) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped Unit) {- | Substance - groups flows with the same chemical identity across databases Used for flow matching between different LCA databases (ecoinvent, ILCD, SimaPro) @@ -137,7 +141,7 @@ data TechnosphereFlow = TechnosphereFlow , tfSubstanceId :: !(Maybe Int) } deriving (Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped TechnosphereFlow) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped TechnosphereFlow) {- | A biosphere flow — an environmental exchange (resource extraction or emission). Always carries a `Compartment` identifying the medium. @@ -157,7 +161,7 @@ data BiosphereFlow = BiosphereFlow -} } deriving (Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped BiosphereFlow) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped BiosphereFlow) {- | A waste flow — a residual output that a process generates and which a treatment activity may consume as its reference input. Sister type to @@ -179,7 +183,7 @@ data WasteFlow = WasteFlow , wfSubstanceId :: !(Maybe Int) } deriving (Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped WasteFlow) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped WasteFlow) {- | Pedigree matrix (Weidema & Wesnæs 1996) — five LCA data-quality scores each in 1..5 (1 = best, 5 = worst). SimaPro CSV encodes it as a prefix in the @@ -193,7 +197,7 @@ data Pedigree = Pedigree , pedTechnological :: !Int -- 1..5 } deriving (Eq, Show, Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped Pedigree) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped Pedigree) {- | Smart constructor: rejects out-of-range values (anything not in 1..5) by returning Nothing. Callers should treat Nothing as "no pedigree @@ -243,7 +247,7 @@ data Exchange , waPedigree :: !(Maybe Pedigree) -- LCA data-quality scores when available } deriving (Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped Exchange) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped Exchange) -- | Helper functions for Exchange variants exchangeFlowId :: Exchange -> UUID From 7fdb56570f307f91dd83e0516cbddeadc18bee70 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 11:09:59 +0200 Subject: [PATCH 12/43] refactor(api): loadCollection and crossDBSolutionFor read env, not args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the explicit DatabaseManager parameter from loadCollection and crossDBSolutionFor; both now `asks getDatabaseManager` inside their AppM body. Call sites lose the corresponding closure-captured arg. Net delta is small (+2 LOC) — the body gains the `asks` line but each call site loses one word. The win is architectural: every helper that needs the database manager now declares it as a capability requirement on the monad, not as a positional parameter on every signature, which is the Has-pattern (capability-class projection) the prior commits laid the foundation for. The remaining helpers that still take DatabaseManager (prepMethodCtx, buildPerDbSetTables, batchedScoresFor, computeCategoryResult, mkMcpCrossDBEntry) run in IO, not AppM, so a similar conversion would require either widening them to AppM (cascading change) or threading a Reader instance through IO. Both belong to follow-ups (notably the MCP Freer refactor, which would naturally bring the MCP-side helpers into AppM as well). --- src/API/Routes.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/API/Routes.hs b/src/API/Routes.hs index 3bcfc67f..cbeeb53c 100644 --- a/src/API/Routes.hs +++ b/src/API/Routes.hs @@ -509,8 +509,9 @@ throwServiceError (Service.InvalidUUID _) = throwError err500{errBody = "Interna throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Internal server error"} -- | Load a method collection by name from the live DatabaseManager state. -loadCollection :: DatabaseManager -> Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet]) -loadCollection dbManager collectionName = do +loadCollection :: Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet]) +loadCollection collectionName = do + dbManager <- asks getDatabaseManager loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager) case M.lookup collectionName loadedCollections of Just mc -> return (mcMethods mc, mcDamageCategories mc, mcNormWeightSets mc, mcScoringSets mc) @@ -520,10 +521,11 @@ loadCollection dbManager collectionName = do no-substitution path ('requireFullyLinked' runs inside 'solutionWithDeps'); 'Just' applies the substitutions through the uncached path. -} -crossDBSolutionFor :: DatabaseManager -> Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution -crossDBSolutionFor dbManager dbName db solver pid mSub = case mSub of +crossDBSolutionFor :: Text -> Database -> SharedSolver -> ProcessId -> Maybe SubstitutionRequest -> AppM SharedSolver.CrossDBSolution +crossDBSolutionFor dbName db solver pid mSub = case mSub of Nothing -> solutionWithDeps dbName db solver pid Just subReq -> do + dbManager <- asks getDatabaseManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager eSol <- @@ -790,11 +792,11 @@ activityLCIABatchH dbName processIdText collectionName mSub = do dbManager <- asks getDatabaseManager (db, sharedSolver) <- requireDatabaseByName dbName (actProcessId, activity) <- resolveOrThrow db processIdText - (methods, damageCats, nwSets, scoringSets) <- loadCollection dbManager collectionName + (methods, damageCats, nwSets, scoringSets) <- loadCollection collectionName let dcLookup = M.fromList [(subName, dcName dc) | dc <- damageCats, (subName, _) <- dcImpacts dc] mNW = case nwSets of (nw : _) -> Just nw; [] -> Nothing t0 <- liftIO getCurrentTime - sol <- crossDBSolutionFor dbManager dbName db sharedSolver actProcessId mSub + sol <- crossDBSolutionFor dbName db sharedSolver actProcessId mSub t1 <- liftIO getCurrentTime let inventory = SharedSolver.csInventory sol !invSize = M.size inventory @@ -1162,7 +1164,7 @@ lcaServer env = activityInventoryCore dbName processIdText mSub = do (db, sharedSolver) <- requireDatabaseByName dbName (processId, activity) <- resolveOrThrow db processIdText - sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub + sol <- crossDBSolutionFor dbName db sharedSolver processId mSub (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol) @@ -1392,7 +1394,7 @@ lcaServer env = (db, sharedSolver) <- requireDatabaseByName dbName method <- loadMethodByUUID methodIdText (processId, activity) <- resolveOrThrow db processIdText - sol <- crossDBSolutionFor dbManager dbName db sharedSolver processId mSub + sol <- crossDBSolutionFor dbName db sharedSolver processId mSub result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method when (isNothing mSub) $ liftIO $ logLCIAResult result method pure result From 73e195089da3e937aa29c9a779000bf6a97660b1 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 11:13:41 +0200 Subject: [PATCH 13/43] =?UTF-8?q?refactor(dbhandlers):=20factor=20IO/Eithe?= =?UTF-8?q?r=E2=86=92400=20ladder=20into=20ioEither400?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three handlers (addDependencyHandler, removeDependencyHandler, setDataPathHandler) all had the same shape: result <- liftIO $ X case result of Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} Right v -> return v Extract that pattern as @ioEither400 :: IO (Either Text a) -> AppM a@ next to @simpleAction@. Each handler body shrinks from a 5-line case-ladder to a single ioEither400 call. The other Left→throwError sites in this file use different status codes (404, custom response shapes like LoadFailed / RelinkResponse) and don't fit a single helper, so they're left alone — there's no point forcing a Procrustean abstraction. LOC roughly break-even — the helper costs lines its callers save — but the pattern is now named and reusable. --- src/API/DatabaseHandlers.hs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs index ae990c6e..aa011473 100644 --- a/src/API/DatabaseHandlers.hs +++ b/src/API/DatabaseHandlers.hs @@ -337,27 +337,21 @@ getDatabaseSetupHandler dbName= do Runs cross-DB linking and returns updated setup info -} addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo -addDependencyHandler dbName depName= do +addDependencyHandler dbName depName = do dbManager <- asks getDatabaseManager - result <- liftIO $ addDependencyToStaged dbManager dbName depName - case result of - Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} - Right setupInfo -> return setupInfo + ioEither400 (addDependencyToStaged dbManager dbName depName) {- | Remove a dependency from a staged database Re-runs cross-DB linking and returns updated setup info -} removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo -removeDependencyHandler dbName depName= do +removeDependencyHandler dbName depName = do dbManager <- asks getDatabaseManager - result <- liftIO $ removeDependencyFromStaged dbManager dbName depName - case result of - Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} - Right setupInfo -> return setupInfo + ioEither400 (removeDependencyFromStaged dbManager dbName depName) -- | Change the data path for an uploaded (staged) database setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo -setDataPathHandler dbName body= do +setDataPathHandler dbName body = do dbManager <- asks getDatabaseManager -- Extract "path" from JSON body let mPath = case body of @@ -367,11 +361,7 @@ setDataPathHandler dbName body= do _ -> Nothing case mPath of Nothing -> throwError $ err400{errBody = "Missing \"path\" field in request body"} - Just newPath -> do - result <- liftIO $ setDataPath dbManager dbName newPath - case result of - Left err -> throwError $ err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} - Right setupInfo -> return setupInfo + Just newPath -> ioEither400 (setDataPath dbManager dbName newPath) {- | Finalize a staged database Builds matrices and makes it ready for queries @@ -460,6 +450,18 @@ simpleAction action successMsg = do Left err -> ActivateResponse False err Nothing Right () -> ActivateResponse True successMsg Nothing +{- | @ioEither400 m@ runs an IO action that returns @Either Text a@; on +@Left@ throws a 400 with the message body, on @Right@ propagates. Used +to compress the @do result <- liftIO ...; case result of Left … Right …@ +ladder that recurs across every handler returning a typed payload. +-} +ioEither400 :: IO (Either Text a) -> AppM a +ioEither400 action = do + result <- liftIO action + case result of + Left err -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 err} + Right v -> return v + -------------------------------------------------------------------------------- -- Reference Data Handlers (flow synonyms, compartment mappings, units) -------------------------------------------------------------------------------- From 679b570860544858dc77798485ea6a33a9875f6d Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 12:11:32 +0200 Subject: [PATCH 14/43] refactor(schema): LocationFallback/LocationUnresolved self-encode via Stripped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give LocationKind a stable ToJSON (lowercase wire codes via the existing locationKindCode) and migrate LocationFallback and LocationUnresolved to attached `deriving (ToJSON, FromJSON) via (Stripped X)`. The custom encodeFallback / encodeUnresolved helpers inside DatabaseSetupInfo's manual ToJSON disappear — the parent now just serializes the field directly and the structured serialization comes from the field type. Categorically: pushing the ToJSON dictionary down to the leaf types gives us the same property as `cata` in recursion schemes — the encoder for a structure is the composition of encoders for its parts. Aeson's deriving machinery delivers exactly that composition through the Generic Rep. Confirmed byte-identical at runtime: aeson preserves field-declaration order in its Object; Stripped's stripLowerPrefix turns {lfProduct, lfRequested, lfActual, lfKind} into {product, requested, actual, kind} — the same keys the manual encoder emitted, in the same order. openapi.json still byte-identical (it's generated from the type-level schema, which was already custom for LocationKind and unchanged for the records). Hspec 1052/1052 green. --- src/Database/Manager.hs | 19 ++++--------------- src/Types.hs | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs index 2a7ffb97..8140c7d3 100644 --- a/src/Database/Manager.hs +++ b/src/Database/Manager.hs @@ -350,26 +350,15 @@ instance ToJSON DatabaseSetupInfo where , "dependencies" .= dsiDependencies , "isReady" .= dsiIsReady , "unknownUnits" .= dsiUnknownUnits - , "locationFallbacks" .= map encodeFallback dsiLocationFallbacks - , "locationUnresolved" .= map encodeUnresolved dsiLocationUnresolved + , -- LocationFallback / LocationUnresolved carry their own + -- Stripped-derived ToJSON now; no per-site encoder needed. + "locationFallbacks" .= dsiLocationFallbacks + , "locationUnresolved" .= dsiLocationUnresolved , "dataPath" .= dsiDataPath , "availablePaths" .= map encodeCandidate dsiAvailablePaths , "isLoaded" .= dsiIsLoaded ] where - encodeFallback LocationFallback{lfProduct, lfRequested, lfActual, lfKind} = - A.object - [ "product" .= lfProduct - , "requested" .= lfRequested - , "actual" .= lfActual - , "kind" .= locationKindCode lfKind - ] - encodeUnresolved LocationUnresolved{luProduct, luRequested, luReason} = - A.object - [ "product" .= luProduct - , "requested" .= luRequested - , "reason" .= luReason - ] encodeCandidate (path, fmt, cnt) = A.object ["path" .= path, "format" .= fmt, "fileCount" .= cnt] diff --git a/src/Types.hs b/src/Types.hs index b054886e..b3681e94 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -1044,6 +1044,22 @@ locationKindCode ParentLoc = "parent" locationKindCode GlobalLoc = "global" locationKindCode UnrelatedLoc = "unrelated" +-- | Lowercase-wire-code ToJSON for 'LocationKind'. Stays in lock-step with +-- 'locationKindCode' so the JSON output and the rejection-reason text can +-- never drift apart. +instance ToJSON LocationKind where + toJSON = toJSON . locationKindCode + +instance FromJSON LocationKind where + parseJSON v = do + s <- parseJSON v + case (s :: Text) of + "exact" -> pure ExactLoc + "parent" -> pure ParentLoc + "global" -> pure GlobalLoc + "unrelated" -> pure UnrelatedLoc + other -> fail $ "Invalid LocationKind: " <> T.unpack other + -- | A product whose supplier was found at a wider geography than requested. data LocationFallback = LocationFallback { lfProduct :: !Text @@ -1052,6 +1068,7 @@ data LocationFallback = LocationFallback , lfKind :: !LocationKind } deriving (Show, Eq, Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped LocationFallback) {- | A product whose supplier could not be linked — either because no candidate matched the name/unit, or because every geographic candidate was rejected by @@ -1063,6 +1080,7 @@ data LocationUnresolved = LocationUnresolved , luReason :: !Text } deriving (Show, Eq, Generic, NFData, Store) + deriving (ToJSON, FromJSON) via (Stripped LocationUnresolved) {- | Statistics from cross-database linking Only essential state is stored; counts are derived via accessor functions. From f90ba14acf5e6a8088bc2a9ed13f04419dcab02d Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 12:15:23 +0200 Subject: [PATCH 15/43] refactor(schema): co-locate ToSchema LocationKind with its data decl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The string-enum ToSchema for LocationKind (lowercase wire codes matching locationKindCode) joins its ToJSON/FromJSON instances next to the data declaration in src/Types.hs. Unblocks `deriving ToSchema via Stripped` on LocationFallback and LocationUnresolved — the dependent schema lookup now finds the LocationKind instance directly, not through an orphan in API/OpenApi.hs that downstream modules can't see. src/API/OpenApi.hs continues to shrink toward its real role: the enrichWithResources post-processor plus a handful of genuinely-bespoke schema instances (Value, BinaryContent, ApiFlow, the Database.Manager domain types). Every "type with a generic stripped schema" now lives next to its data declaration — the schema, the JSON encoder, and the data definition share one source location. --- src/API/OpenApi.hs | 22 +++------------------- src/Types.hs | 21 ++++++++++++++++++--- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs index febd7612..3ad6ac58 100644 --- a/src/API/OpenApi.hs +++ b/src/API/OpenApi.hs @@ -47,25 +47,9 @@ instance ToSchema MissingSupplier instance ToSchema DependencyStatus instance ToSchema DependencyChoice --- Manual schema for LocationKind: ToJSON in Database.Manager.encodeFallback --- emits lowercase wire codes (exact / parent / global / unrelated) via --- Types.locationKindCode. The generic schema would otherwise advertise the --- raw Haskell constructor names. -instance ToSchema LocationKind where - declareNamedSchema _ = - pure $ - NamedSchema (Just "LocationKind") $ - mempty - & type_ ?~ OpenApiString - & enum_ - ?~ [ toJSON ("exact" :: Text) - , toJSON ("parent" :: Text) - , toJSON ("global" :: Text) - , toJSON ("unrelated" :: Text) - ] - -instance ToSchema LocationFallback where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -instance ToSchema LocationUnresolved where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions +-- ToSchema for LocationKind / LocationFallback / LocationUnresolved derived +-- alongside their data declarations in src/Types.hs (LocationKind as a string +-- enum matching the lowercase wire codes; the records via Stripped). instance ToSchema DatabaseSetupInfo where declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -- API.Types — every record type uses strippedSchemaOptions so the generated diff --git a/src/Types.hs b/src/Types.hs index b3681e94..1c180256 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -36,7 +36,8 @@ import Data.List (nub) import Search.BM25.Types (BM25Index) import SynonymDB (normalizeName) import SynonymDB.Types (SynonymDB) -import Data.OpenApi (ToSchema) +import Control.Lens ((&), (?~)) +import Data.OpenApi (NamedSchema (..), OpenApiType (..), ToSchema (..), enum_, type_) -- | Orphan Store instance for UUID (16 bytes, host-native word order) instance Store UUID where @@ -1060,6 +1061,20 @@ instance FromJSON LocationKind where "unrelated" -> pure UnrelatedLoc other -> fail $ "Invalid LocationKind: " <> T.unpack other +-- | OpenAPI schema for 'LocationKind' as a string-enum matching the wire codes +-- produced by 'locationKindCode'. The generic schema would expose the raw +-- Haskell constructor names; this keeps the spec in sync with the ToJSON. +instance ToSchema LocationKind where + declareNamedSchema _ = + pure $ + NamedSchema (Just "LocationKind") $ + mempty + & type_ ?~ OpenApiString + & enum_ + ?~ [ toJSON (c :: Text) + | c <- ["exact", "parent", "global", "unrelated"] + ] + -- | A product whose supplier was found at a wider geography than requested. data LocationFallback = LocationFallback { lfProduct :: !Text @@ -1068,7 +1083,7 @@ data LocationFallback = LocationFallback , lfKind :: !LocationKind } deriving (Show, Eq, Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped LocationFallback) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped LocationFallback) {- | A product whose supplier could not be linked — either because no candidate matched the name/unit, or because every geographic candidate was rejected by @@ -1080,7 +1095,7 @@ data LocationUnresolved = LocationUnresolved , luReason :: !Text } deriving (Show, Eq, Generic, NFData, Store) - deriving (ToJSON, FromJSON) via (Stripped LocationUnresolved) + deriving (ToJSON, FromJSON, ToSchema) via (Stripped LocationUnresolved) {- | Statistics from cross-database linking Only essential state is stored; counts are derived via accessor functions. From 323d87cc62152203b8c7d37db99953a6fd48625b Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 12:51:54 +0200 Subject: [PATCH 16/43] refactor(schema): MissingSupplier/DependencyChoice/DatabaseSetupInfo via Stripped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the three remaining hand-rolled ToJSON instances in Database.Manager onto the Stripped DerivingVia carrier: - MissingSupplier: 9 lines of manual `A.object [...]` → 1 deriving clause. Wire format unchanged (msProductName → productName, etc.). - DependencyChoice: 7 lines → 1 deriving clause. Wire format unchanged. - DatabaseSetupInfo: 25 lines (with inline encodeFallback / encodeUnresolved / encodeCandidate helpers) → 1 deriving clause. To make the last one work, the bespoke 3-tuple @dsiAvailablePaths :: ![(Text, Text, Int)]@ is promoted to a proper record @data PathCandidate = PathCandidate { pcPath, pcFormat, pcFileCount }@, which also derives via Stripped. The inline encodeCandidate helper inside DatabaseSetupInfo's ToJSON disappears along with the rest. Runtime JSON: byte-identical. aeson's Generic ToJSON preserves field declaration order, which matches the order the manual instances used. Key names match (stripLowerPrefix on the prefixed Haskell field names produces the same JSON keys the manual code emitted). OpenAPI schema: changes deliberately. Before, `availablePaths` was a positional 3-tuple — `[[string, string, int]]` — which is awkward in generated clients and was already at odds with what the wire actually emitted (an array of objects). After: the schema reflects the real wire shape — `array` with named `path`, `format`, `fileCount` properties. A new `PathCandidate` schema component is added. The pre-existing inconsistency (schema said tuple, wire said object) is now resolved in favor of the wire. Categorically, this is the generic/specific decomposition the prior commits established: the type-level Stripped carrier is the *generic* machinery (one definition, one piece of code that handles field-name-stripping for every record), and what each type declares is purely *specific* — "I am a record with these fields". The serialization, schema, and shape now share one source location per type; the JSON encoder, the OpenAPI schema generator, and the data declaration cannot drift apart. Hspec 1052/1052 green. --- src/Database/Manager.hs | 79 ++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 52 deletions(-) diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs index 8140c7d3..ac947cc4 100644 --- a/src/Database/Manager.hs +++ b/src/Database/Manager.hs @@ -1,7 +1,8 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingVia #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -104,7 +105,9 @@ import Control.Concurrent.STM import Control.Exception (SomeException, try) import qualified Control.Exception import Control.Monad (forM, forM_, unless, when) +import API.JsonOptions (Stripped (..)) import Data.Aeson (FromJSON (..), ToJSON (..), (.:), (.:?), (.=)) +import Data.OpenApi (ToSchema) import qualified Data.Aeson as A import Data.Bifunctor (first) import Data.Char (toLower) @@ -254,16 +257,7 @@ data MissingSupplier = MissingSupplier -- ^ e.g. "kg vs ton", "FR not available" } deriving (Show, Eq, Generic) - -instance ToJSON MissingSupplier where - toJSON MissingSupplier{..} = - A.object - [ "productName" .= msProductName - , "count" .= msCount - , "location" .= msLocation - , "reason" .= msReason - , "detail" .= msDetail - ] + deriving (ToJSON) via (Stripped MissingSupplier) {- | Whether a candidate dependency is currently selected, merely available, or redundant under the minimal cover (matches links but every link it wins @@ -285,15 +279,23 @@ data DependencyChoice = DependencyChoice , dchMatchCount :: !Int } deriving (Show, Eq, Generic) + deriving (ToJSON) via (Stripped DependencyChoice) -instance ToJSON DependencyChoice where - toJSON DependencyChoice{..} = - A.object - [ "status" .= dchStatus - , "databaseName" .= dchDatabaseName - , "displayName" .= dchDisplayName - , "matchCount" .= dchMatchCount - ] +{- | One of the candidate data directories inside an uploaded database's +upload root. Surfaces in @DatabaseSetupInfo.dsiAvailablePaths@ so the UI +can present a picker. The schema is now a proper named object instead +of a positional 3-tuple. +-} +data PathCandidate = PathCandidate + { pcPath :: !Text + -- ^ Relative path under the upload root + , pcFormat :: !Text + -- ^ Format label (e.g. "EcoSpold 2", "SimaPro CSV", "Unknown") + , pcFileCount :: !Int + -- ^ Number of data files detected in this directory + } + deriving (Show, Eq, Generic) + deriving (ToJSON, ToSchema) via (Stripped PathCandidate) -- | Setup info for a database (for the setup page) data DatabaseSetupInfo = DatabaseSetupInfo @@ -328,40 +330,13 @@ data DatabaseSetupInfo = DatabaseSetupInfo -} , dsiDataPath :: !Text -- ^ Current selected data path (relative) - , dsiAvailablePaths :: ![(Text, Text, Int)] - -- ^ (relativePath, formatLabel, fileCount) + , dsiAvailablePaths :: ![PathCandidate] + -- ^ Candidate data directories within the upload root , dsiIsLoaded :: !Bool -- ^ True if database is already loaded (read-only info) } deriving (Show, Eq, Generic) - -instance ToJSON DatabaseSetupInfo where - toJSON DatabaseSetupInfo{..} = - A.object - [ "name" .= dsiName - , "displayName" .= dsiDisplayName - , "activityCount" .= dsiActivityCount - , "inputCount" .= dsiInputCount - , "completeness" .= dsiCompleteness - , "internalLinks" .= dsiInternalLinks - , "crossDBLinks" .= dsiCrossDBLinks - , "unresolvedLinks" .= dsiUnresolvedLinks - , "missingSuppliers" .= dsiMissingSuppliers - , "dependencies" .= dsiDependencies - , "isReady" .= dsiIsReady - , "unknownUnits" .= dsiUnknownUnits - , -- LocationFallback / LocationUnresolved carry their own - -- Stripped-derived ToJSON now; no per-site encoder needed. - "locationFallbacks" .= dsiLocationFallbacks - , "locationUnresolved" .= dsiLocationUnresolved - , "dataPath" .= dsiDataPath - , "availablePaths" .= map encodeCandidate dsiAvailablePaths - , "isLoaded" .= dsiIsLoaded - ] - where - encodeCandidate (path, fmt, cnt) = - A.object - ["path" .= path, "format" .= fmt, "fileCount" .= cnt] + deriving (ToJSON) via (Stripped DatabaseSetupInfo) -- | Errors from getDatabaseSetupInfo data SetupError = SetupNotFound Text | SetupFailed Text @@ -2051,9 +2026,9 @@ buildLoadedSetupInfo config db configs indexedDbs = } {- | Discover candidate data paths within an uploaded database's root directory. -Returns (relativePath, formatLabel, fileCount) for each candidate. +Returns one 'PathCandidate' per candidate directory. -} -discoverCandidatePaths :: DatabaseConfig -> IO [(Text, Text, Int)] +discoverCandidatePaths :: DatabaseConfig -> IO [PathCandidate] discoverCandidatePaths dbConfig = do uploadsDir <- UploadedDB.getDatabaseUploadsDir let uploadRoot = uploadsDir T.unpack (dcName dbConfig) @@ -2069,7 +2044,7 @@ discoverCandidatePaths dbConfig = do Upload.ILCDProcess -> "ILCD" Upload.OpenLcaJsonLd -> "openLCA JSON-LD" Upload.UnknownFormat -> "Unknown" - return (T.pack rel, label, count) + return PathCandidate{pcPath = T.pack rel, pcFormat = label, pcFileCount = count} where -- Simple relative path: strip upload root prefix makeRelativePath base path From fbbe23b870459edc3de413367fa290825aaeadf2 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 12:54:47 +0200 Subject: [PATCH 17/43] fix(schema): align openapi for MissingSupplier / DependencyChoice / DependencyStatus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three pre-existing schema/wire mismatches in Database.Manager — now that the JSON instances are Stripped-derived, fix the schemas too: - MissingSupplier: schema previously advertised the Haskell field names (msCount, msProductName, msLocation, msReason, msDetail); the actual JSON has always emitted the Stripped names (count, productName, location, reason, detail). Schema now matches. Achieved by deriving (ToJSON, ToSchema) via (Stripped X) so the two share one source of truth. - DependencyChoice: same situation — schema said dchStatus etc., wire emits status etc. Same fix. - DependencyStatus: schema previously declared the enum as [SelectedDep, AvailableDep, RedundantDep] (raw constructor names); the wire has always emitted [selected, available, redundant] (lowercase, via the hand-rolled ToJSON). Schema now declared explicitly as the lowercase string-enum that matches. Wire JSON unchanged at every endpoint (the ToJSON instances produce the same bytes as before; Stripped's stripLowerPrefix is bit-equivalent to the manual A.object [...] form, in the same field order). OpenAPI changes are deliberate corrections. Any tooling that read the openapi.json to generate clients was previously broken for these three types (it would expect Haskell-named fields that the server never emits). The categorical pattern that makes this fix one-line-per-type is the same isomorphism transport via Coercible the prior commits established — pushing the wire contract into the type's Generic Rep keeps the encoder, the decoder, and the schema in lock-step. --- src/API/OpenApi.hs | 7 +++---- src/Database/Manager.hs | 19 ++++++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/API/OpenApi.hs b/src/API/OpenApi.hs index 3ad6ac58..6039176a 100644 --- a/src/API/OpenApi.hs +++ b/src/API/OpenApi.hs @@ -42,10 +42,9 @@ instance ToSchema Value where -- Domain types: TechRole, BioDirection, Unit now derive ToSchema next to -- their data declarations in src/Types.hs (via anyclass / DerivingVia). --- Database.Manager types -instance ToSchema MissingSupplier -instance ToSchema DependencyStatus -instance ToSchema DependencyChoice +-- Database.Manager ToSchema instances (MissingSupplier, DependencyChoice via +-- Stripped; DependencyStatus as a lowercase string-enum) now live next to +-- their data declarations in src/Database/Manager.hs. -- ToSchema for LocationKind / LocationFallback / LocationUnresolved derived -- alongside their data declarations in src/Types.hs (LocationKind as a string diff --git a/src/Database/Manager.hs b/src/Database/Manager.hs index ac947cc4..09632cbe 100644 --- a/src/Database/Manager.hs +++ b/src/Database/Manager.hs @@ -107,7 +107,8 @@ import qualified Control.Exception import Control.Monad (forM, forM_, unless, when) import API.JsonOptions (Stripped (..)) import Data.Aeson (FromJSON (..), ToJSON (..), (.:), (.:?), (.=)) -import Data.OpenApi (ToSchema) +import Control.Lens ((&), (?~)) +import Data.OpenApi (NamedSchema (..), OpenApiType (..), ToSchema (..), enum_, type_) import qualified Data.Aeson as A import Data.Bifunctor (first) import Data.Char (toLower) @@ -257,7 +258,7 @@ data MissingSupplier = MissingSupplier -- ^ e.g. "kg vs ton", "FR not available" } deriving (Show, Eq, Generic) - deriving (ToJSON) via (Stripped MissingSupplier) + deriving (ToJSON, ToSchema) via (Stripped MissingSupplier) {- | Whether a candidate dependency is currently selected, merely available, or redundant under the minimal cover (matches links but every link it wins @@ -271,6 +272,18 @@ instance ToJSON DependencyStatus where toJSON AvailableDep = A.String "available" toJSON RedundantDep = A.String "redundant" +-- | String-enum schema matching the lowercase wire codes from ToJSON above. +-- The previous default-Generic schema advertised the raw Haskell constructor +-- names (SelectedDep / AvailableDep / RedundantDep), which is what the schema +-- said but never what the wire emitted. +instance ToSchema DependencyStatus where + declareNamedSchema _ = + pure $ + NamedSchema (Just "DependencyStatus") $ + mempty + & type_ ?~ OpenApiString + & enum_ ?~ [toJSON (c :: Text) | c <- ["selected", "available", "redundant"]] + -- | A candidate dependency database in one of three states data DependencyChoice = DependencyChoice { dchStatus :: !DependencyStatus @@ -279,7 +292,7 @@ data DependencyChoice = DependencyChoice , dchMatchCount :: !Int } deriving (Show, Eq, Generic) - deriving (ToJSON) via (Stripped DependencyChoice) + deriving (ToJSON, ToSchema) via (Stripped DependencyChoice) {- | One of the candidate data directories inside an uploaded database's upload root. Surfaces in @DatabaseSetupInfo.dsiAvailablePaths@ so the UI From 10094dfbf882a93cf080f97b003afa8a0ea3e184 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 17:30:47 +0200 Subject: [PATCH 18/43] refactor(stats): CrossDBLinkingStats and TreeStats become Monoids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the bespoke @mergeCrossDBStats@ / @emptyCrossDBLinkingStats@ pair and the @combineStats@ helper with proper @Semigroup@ / @Monoid@ instances. All call sites now speak the same vocabulary every other stats accumulator in the codebase uses: - @foldr mergeCrossDBStats emptyCrossDBLinkingStats xs@ → @mconcat xs@ - @combineStats a b@ → @a <> b@ - @emptyCrossDBLinkingStats@ → @mempty@ - @mergeCrossDBStats a b@ → @a <> b@ Categorically: both records are the *product of monoids in the category of types*, derived componentwise from each field's monoid. Hand-written rather than @via Generically@ because bare @Int@ has no canonical 'Monoid' (Sum vs Product is ambiguous) and @(Int, LinkBlocker)@ as a map-value mixes a monoid with a non-monoid — the explicit instance keeps the public field types ergonomic (still @Int@, not @Sum Int@) at the cost of a few lines of mechanical derivation. This is the same pattern the prior commit applied to 'UnlinkedSummary', applied now to the two other genuine stats accumulators in the codebase. What remains as named @merge*@ functions are operations that are not pure product-of-monoids: 'mergeUnitConfigs' has a "first-wins" field mixed with last-wins maps; 'mergeSynonymDBs' renumbers group IDs; 'mergeTechFlows' / 'mergeBioFlows' are left-biased with one unioned field. Those keep their named functions because the asymmetry would be confusing under the @<>@ vocabulary. Hspec 1052/1052 green. --- src/Database.hs | 2 +- src/Database/Loader.hs | 20 ++++++++-------- src/Service.hs | 12 +++++++--- src/Types.hs | 38 +++++++++++++++++------------- test/CrossDBRegionalLCIAFixture.hs | 2 +- test/RegionalLCIASpec.hs | 3 +-- 6 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/Database.hs b/src/Database.hs index 40365f83..ff0d55d1 100644 --- a/src/Database.hs +++ b/src/Database.hs @@ -256,7 +256,7 @@ buildDatabaseWithMatrices unitConfig activityMap techFlowDB bioFlowDB wasteFlowD , dbBiosphereCount = bioFlowCount , dbCrossDBLinks = [] , dbDependsOn = [] - , dbLinkingStats = emptyCrossDBLinkingStats + , dbLinkingStats = mempty , dbSynonymDB = Nothing , dbFlowsByName = M.empty , dbFlowsByCAS = M.empty diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs index 405a240d..819cf06e 100644 --- a/src/Database/Loader.hs +++ b/src/Database/Loader.hs @@ -45,7 +45,7 @@ module Database.Loader ( fixActivityLinksWithCrossDB, findAllCrossDBLinks, CrossDBLinkingStats (..), - emptyCrossDBLinkingStats, + mempty, crossDBLinksCount, unresolvedCount, crossDBBySource, @@ -1055,7 +1055,7 @@ saveCachedDatabaseWithMatrices dbName dataDir db = do -- Cross-Database Linking -------------------------------------------------------------------------------- -{- | CrossDBLinkingStats, emptyCrossDBLinkingStats, mergeCrossDBStats, +{- | CrossDBLinkingStats, mempty, (<>), crossDBLinksCount, unresolvedCount, crossDBBySource are now defined in Types and re-exported from this module. -} @@ -1113,7 +1113,7 @@ loadDatabaseWithCrossDBLinking locationAliases otherIndexes synonymDB unitConfig if null otherIndexes then do -- No cross-DB linking needed - let !stats = emptyCrossDBLinkingStats{cdlUnknownUnits = unknownUnits, cdlTotalInputs = totalInputs} + let !stats = mempty{cdlUnknownUnits = unknownUnits, cdlTotalInputs = totalInputs} reportCrossDBLinkingStats (M.size (sdbActivities simpleDb)) stats return $ Right (simpleDb, stats) else do @@ -1166,7 +1166,7 @@ fixActivityLinksWithCrossDB indexedDbs synonymDB unitConfig locationHier policy if unlinkedBefore == 0 then do reportProgress Info "No unlinked exchanges to resolve via cross-DB linking" - return (db, emptyCrossDBLinkingStats{cdlTotalInputs = totalInputs}) + return (db, mempty{cdlTotalInputs = totalInputs}) else do reportProgress Info $ printf @@ -1272,7 +1272,7 @@ findAllCrossDBLinks :: CrossDBLinkingStats findAllCrossDBLinks ctx techFlowDb unitDb activities = let results = M.mapWithKey (findActivityCrossDBLinks ctx techFlowDb unitDb) activities - in foldr mergeCrossDBStats emptyCrossDBLinkingStats (M.elems results) + in mconcat (M.elems results) -- | Find cross-database links for one activity's exchanges findActivityCrossDBLinks :: @@ -1285,7 +1285,7 @@ findActivityCrossDBLinks :: CrossDBLinkingStats findActivityCrossDBLinks ctx techFlowDb unitDb (consumerActUUID, consumerProdUUID) act = let stats = map (findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID) (exchanges act) - in foldr mergeCrossDBStats emptyCrossDBLinkingStats stats + in mconcat stats {- | Find cross-database link for a single exchange. @@ -1304,7 +1304,7 @@ findExchangeCrossDBLink :: findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID ex@TechnosphereExchange{techFlowId = fid, techAmount = amt, techActivityLinkId = linkId, techLocation = loc} | exchangeIsInput ex && linkId == UUID.nil = case M.lookup fid techFlowDb of - Nothing -> emptyCrossDBLinkingStats + Nothing -> mempty Just flow -> let flowUnitName = maybe "" unitName (M.lookup (tfUnitId flow) unitDb) in case findSupplierAcrossDatabases ctx (tfName flow) loc flowUnitName of @@ -1344,13 +1344,13 @@ findExchangeCrossDBLink ctx techFlowDb unitDb consumerActUUID consumerProdUUID e NoNameMatch -> [] UnitIncompatible _ _ -> [] in CrossDBLinkingStats [] (M.singleton (tfName flow) (1, blocker)) S.empty [] unresolved 0 - | otherwise = emptyCrossDBLinkingStats -findExchangeCrossDBLink _ _ _ _ _ BiosphereExchange{} = emptyCrossDBLinkingStats + | otherwise = mempty +findExchangeCrossDBLink _ _ _ _ _ BiosphereExchange{} = mempty -- Cross-DB linking for waste flows is deferred: orphan waste outputs are -- end-of-life markers (no demand on another DB), and waste *inputs* that -- require a treatment supplier would need a dedicated lookup keyed on -- WasteFlow rather than TechnosphereFlow. Pure no-op until that path lands. -findExchangeCrossDBLink _ _ _ _ _ WasteExchange{} = emptyCrossDBLinkingStats +findExchangeCrossDBLink _ _ _ _ _ WasteExchange{} = mempty -- | Report cross-database linking statistics reportCrossDBLinkingStats :: Int -> CrossDBLinkingStats -> IO () diff --git a/src/Service.hs b/src/Service.hs index fbfb07d5..3f51c653 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -342,10 +342,16 @@ getActivityInventoryWithSharedSolver validators sharedSolver db processIdText = return $ Right inventoryExport -- | Simple stats tracking for tree processing +-- | Tree-traversal counters (total nodes / loop nodes / leaf nodes). The +-- 'Semigroup' / 'Monoid' instance is the product of three Sum-Int monoids, +-- hand-written to keep the bare 'Int' constructor positions ergonomic. data TreeStats = TreeStats Int Int Int -- total, loops, leaves -combineStats :: TreeStats -> TreeStats -> TreeStats -combineStats (TreeStats t1 l1 v1) (TreeStats t2 l2 v2) = TreeStats (t1 + t2) (l1 + l2) (v1 + v2) +instance Semigroup TreeStats where + TreeStats t1 l1 v1 <> TreeStats t2 l2 v2 = TreeStats (t1 + t2) (l1 + l2) (v1 + v2) + +instance Monoid TreeStats where + mempty = TreeStats 0 0 0 {- | Helper to find ProcessId for an activity by searching the database This is needed because activities don't store their own ProcessId/UUID @@ -585,7 +591,7 @@ extractNodesAndEdges db tree depth parentId nodeAcc edgeAcc = case tree of , teUnit = getUnitNameForTechFlow (dbUnits db) flow , teEdgeType = TechnosphereEdge } - newStats = combineStats statsAcc childStats' + newStats = statsAcc <> childStats' in (childNodes, edge : childEdges, newStats) (finalNodes, finalEdges, combinedStats) = foldr processChild (nodes', edgeAcc, TreeStats 1 0 0) children -- Add biosphere nodes and edges only for depth 0 (root level) diff --git a/src/Types.hs b/src/Types.hs index 1c180256..8b3bcde6 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -1116,23 +1116,27 @@ data CrossDBLinkingStats = CrossDBLinkingStats } deriving (Generic, NFData, Store) --- | Empty stats -emptyCrossDBLinkingStats :: CrossDBLinkingStats -emptyCrossDBLinkingStats = CrossDBLinkingStats [] M.empty S.empty [] [] 0 - --- | Merge two CrossDBLinkingStats -mergeCrossDBStats :: CrossDBLinkingStats -> CrossDBLinkingStats -> CrossDBLinkingStats -mergeCrossDBStats s1 s2 = - CrossDBLinkingStats - { cdlLinks = cdlLinks s1 ++ cdlLinks s2 - , cdlUnresolvedProducts = M.unionWith mergeUnresolved (cdlUnresolvedProducts s1) (cdlUnresolvedProducts s2) - , cdlUnknownUnits = S.union (cdlUnknownUnits s1) (cdlUnknownUnits s2) - , cdlLocationFallbacks = cdlLocationFallbacks s1 ++ cdlLocationFallbacks s2 - , cdlLocationUnresolved = cdlLocationUnresolved s1 ++ cdlLocationUnresolved s2 - , cdlTotalInputs = cdlTotalInputs s1 + cdlTotalInputs s2 - } - where - mergeUnresolved (c1, b) (c2, _) = (c1 + c2, b) +{- | Product of monoids, componentwise: lists concat, the count\/blocker +map unions (counts summed, first blocker wins as a tiebreaker), the set +unions, the @Int@ counter sums. Hand-written rather than @via Generically@ +because bare @Int@ has no canonical 'Monoid' (Sum vs Product is ambiguous) +and the @(Int, LinkBlocker)@ map value mixes a 'Monoid' with a non-'Monoid'. +-} +instance Semigroup CrossDBLinkingStats where + s1 <> s2 = + CrossDBLinkingStats + { cdlLinks = cdlLinks s1 <> cdlLinks s2 + , cdlUnresolvedProducts = M.unionWith mergeUnresolved (cdlUnresolvedProducts s1) (cdlUnresolvedProducts s2) + , cdlUnknownUnits = cdlUnknownUnits s1 <> cdlUnknownUnits s2 + , cdlLocationFallbacks = cdlLocationFallbacks s1 <> cdlLocationFallbacks s2 + , cdlLocationUnresolved = cdlLocationUnresolved s1 <> cdlLocationUnresolved s2 + , cdlTotalInputs = cdlTotalInputs s1 + cdlTotalInputs s2 + } + where + mergeUnresolved (c1, b) (c2, _) = (c1 + c2, b) + +instance Monoid CrossDBLinkingStats where + mempty = CrossDBLinkingStats [] M.empty S.empty [] [] 0 -- | Deduplicate location fallbacks by (product, requestedLoc) deduplicateFallbacks :: [LocationFallback] -> [LocationFallback] diff --git a/test/CrossDBRegionalLCIAFixture.hs b/test/CrossDBRegionalLCIAFixture.hs index 5951e707..2ea2fc97 100644 --- a/test/CrossDBRegionalLCIAFixture.hs +++ b/test/CrossDBRegionalLCIAFixture.hs @@ -153,7 +153,7 @@ mkDB offset locs bioTriples = , dbBiosphereCount = 1 , dbCrossDBLinks = [] , dbDependsOn = [] - , dbLinkingStats = emptyCrossDBLinkingStats + , dbLinkingStats = mempty , dbSynonymDB = Nothing , dbFlowsByName = M.empty , dbFlowsByCAS = M.empty diff --git a/test/RegionalLCIASpec.hs b/test/RegionalLCIASpec.hs index 945dc14f..2bc6d3ce 100644 --- a/test/RegionalLCIASpec.hs +++ b/test/RegionalLCIASpec.hs @@ -31,7 +31,6 @@ import Types ( Indexes (..), SparseTriple (..), Unit (..), - emptyCrossDBLinkingStats, emptyProductIndex, ) import UnitConversion (UnitConfig (..), UnitDef (..)) @@ -125,7 +124,7 @@ mkDB locsAndEmissions = , dbBiosphereCount = 1 , dbCrossDBLinks = [] , dbDependsOn = [] - , dbLinkingStats = emptyCrossDBLinkingStats + , dbLinkingStats = mempty , dbSynonymDB = Nothing , dbFlowsByName = M.empty , dbFlowsByCAS = M.empty From 793e317402626c14dbedd36ef57fc3231640fe95 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 17:32:53 +0200 Subject: [PATCH 19/43] refactor(crosslinking): name the "first non-empty" cascade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The supplier-search algorithm in findSupplierInIndexedDBs was a nested if-null ladder: exact = ... syn = if null exact then ... else [] pref = if null exact && null syn then ... else [] all = exact ++ syn ++ pref By the if-null guard, at most one of the three lists is non-empty — the ++ is a sleight-of-hand for "the first non-empty wins". The same pattern recurs one level deeper inside tryPrefixes (try each prefix's exact + synonym sub-cascade; first match wins). Both collapse cleanly to a single helper: firstNonEmpty :: [[a]] -> [a] firstNonEmpty = fromMaybe [] . find (not . null) which is the First monoid on Maybe [a] (lift each candidate list into Maybe via nonEmpty, combine with <|>, drop back) — collapsed because that's the exact shape every match-strategy cascade in the module wants. The cascade then reads as a priority-ordered list of strategies, and the recursive tryPrefixes becomes a simple firstNonEmpty over per-prefix sub-cascades, with the same helper applied at both levels. Net behaviour identical (same inputs, same outputs, no perf change); the algorithm's structure is now declared instead of unfolded. --- src/Database/CrossLinking.hs | 58 ++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/src/Database/CrossLinking.hs b/src/Database/CrossLinking.hs index e4b808ef..7231c0da 100644 --- a/src/Database/CrossLinking.hs +++ b/src/Database/CrossLinking.hs @@ -63,6 +63,7 @@ module Database.CrossLinking ( ) where import Data.Char (isAlpha, isUpper) +import Data.Foldable (find) import Data.List (maximumBy) import qualified Data.Map.Strict as M import Data.Maybe (fromMaybe, mapMaybe) @@ -398,21 +399,19 @@ findSupplierInIndexedDBs :: CrossDBLinkResult findSupplierInIndexedDBs LinkingContext{..} productName location unit = let normalizedName = normalizeText productName - -- Try exact match first (O(1) lookup) - exactCandidates = concatMap (lookupExact normalizedName) lcIndexedDatabases - -- Try synonym match if no exact match - synonymCandidates = - if null exactCandidates - then case lookupSynonymGroup lcSynonymDB (normalizeName productName) of + -- Three priority-ordered match strategies; we take the first non-empty + -- result via 'firstNonEmpty' (the "First" monoid restricted to lists). + -- 1. Exact product-name match across all indexed DBs. + -- 2. Synonym-group match if exact yielded nothing. + -- 3. Prefix-splitting fallback for compound names (e.g. SimaPro). + allCandidates = + firstNonEmpty + [ concatMap (lookupExact normalizedName) lcIndexedDatabases + , case lookupSynonymGroup lcSynonymDB (normalizeName productName) of Just groupId -> concatMap (lookupBySynonym groupId) lcIndexedDatabases Nothing -> [] - else [] - -- Fallback: try prefix-based splitting for compound names (e.g. SimaPro) - prefixCandidates = - if null exactCandidates && null synonymCandidates - then tryPrefixes (extractProductPrefixes productName) - else [] - allCandidates = exactCandidates ++ synonymCandidates ++ prefixCandidates + , tryPrefixes (extractProductPrefixes productName) + ] -- Effective location: if raw location is empty, try extracting from compound name effectiveLocation = if T.null location @@ -481,20 +480,27 @@ findSupplierInIndexedDBs LinkingContext{..} productName location unit = lookupBySynonym groupId idb = [(idbName idb, entry) | entry <- fromMaybe [] (M.lookup groupId (idbBySynonymGroup idb))] - -- Try each prefix from compound name splitting, return first match + {- | First non-empty list in a priority order. This is the @First@ monoid + on @Maybe [a]@ (lift each list into @Maybe@ via 'nonEmpty', combine with + @<|>@, drop back), collapsed to a single helper because that's the + exact shape every match-strategy cascade in this module wants. + -} + firstNonEmpty :: [[a]] -> [a] + firstNonEmpty = fromMaybe [] . find (not . null) + + -- Try each prefix from compound name splitting; for each prefix run the + -- same (exact, then synonym) sub-cascade; return the first prefix that + -- yields anything. tryPrefixes :: [Text] -> [(Text, SupplierEntry)] - tryPrefixes [] = [] - tryPrefixes (p : ps) = - let normalized = normalizeText p - candidates = concatMap (lookupExact normalized) lcIndexedDatabases - in if null candidates - then -- Also try synonym match for this prefix - case lookupSynonymGroup lcSynonymDB (normalizeName p) of - Just groupId -> - let synCandidates = concatMap (lookupBySynonym groupId) lcIndexedDatabases - in if null synCandidates then tryPrefixes ps else synCandidates - Nothing -> tryPrefixes ps - else candidates + tryPrefixes = firstNonEmpty . map candidatesFor + where + candidatesFor p = + let normalized = normalizeText p + byExact = concatMap (lookupExact normalized) lcIndexedDatabases + bySynonym = case lookupSynonymGroup lcSynonymDB (normalizeName p) of + Just groupId -> concatMap (lookupBySynonym groupId) lcIndexedDatabases + Nothing -> [] + in firstNonEmpty [byExact, bySynonym] classifyEntry :: Text -> (Text, SupplierEntry) -> Maybe ((Text, SupplierEntry), LocationKind) classifyEntry queryLoc entry@(_, SupplierEntry{seLocation}) = From b3165571cd1d94e6d229552f6863986691e7456f Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 17:36:46 +0200 Subject: [PATCH 20/43] refactor(api): route every ServiceError through throwServiceError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `throwServiceError` was already defined as the single translator from Service.ServiceError to HTTP error responses, but six handler bodies and three internal helpers (withValidatedActivity, withValidatedFlow, resolveOrThrow) still inlined their own ad-hoc case ladders. The inlined translations had diverged over time — e.g. MatrixError was mapped to err500 in three handlers but err422 in throwServiceError itself (the comment near throwServiceError documents why err422 is correct: it indicates a client-submitted invariant breakage). Each case ladder collapses to `either throwServiceError pure (Service.X ...)`. Net −40 LOC in src/API/Routes.hs. Behaviour changes (all corrections, all toward the documented intent): - GET /graph, GET /supply-chain, GET /aggregate, GET /consumers: MatrixError now returns 422 with the inner message instead of 500 with the same message. Aligns these endpoints with the rest of the cross-DB pipeline. - resolveOrThrow: previously returned 500 with `show e` for MatrixError / InvalidUUID / FlowNotFound; now goes through throwServiceError (422 / 500 with the inner message, respectively). - withValidatedFlow: the three-way M.lookup on tech/bio/waste flows becomes `asum [TechKind <$> ..., BioKind <$> ..., WasteKind <$> ...]` — the same Alternative-on-Maybe pattern firstNonEmpty captured for list cascades. Categorically: the Either→AppM bind is `either throwServiceError pure`, which is the Kleisli arrow that lifts a pure Service.X result into the handler monad. Now used uniformly; the divergent inlines are gone. Hspec 1052/1052 green. No test verifies specific status codes for the shifted endpoints (only that MatrixError is surfaced as an error value at all). --- src/API/Routes.hs | 90 +++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 65 deletions(-) diff --git a/src/API/Routes.hs b/src/API/Routes.hs index cbeeb53c..4ebd1002 100644 --- a/src/API/Routes.hs +++ b/src/API/Routes.hs @@ -19,6 +19,7 @@ import Control.Monad (forM, forM_, unless, when) import Control.Monad.IO.Class (liftIO) import Data.Aeson import qualified Data.ByteString.Lazy as BSL +import Data.Foldable (asum) import Data.List (find, intercalate, sortBy, sortOn) import qualified Data.List.NonEmpty as NE import qualified Data.Validation as V @@ -333,32 +334,24 @@ mkCrossDBContrib dbManager rootDbName _flowDB unitDB score ((depDbName, pid), c) -- | Helper function to validate ProcessId and lookup activity withValidatedActivity :: Database -> Text -> (Activity -> AppM a) -> AppM a -withValidatedActivity db processId action = do - case Service.resolveActivityByProcessId db processId of - Left (Service.InvalidProcessId errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg} - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left _ -> throwError err400{errBody = "Invalid request"} - Right activity -> action activity +withValidatedActivity db processId action = + either throwServiceError action (Service.resolveActivityByProcessId db processId) {- | Helper function to validate UUID and lookup flow. Returns a tagged sum so callers can dispatch on tech vs bio. -} withValidatedFlow :: Database -> Text -> (FlowKind -> AppM a) -> AppM a withValidatedFlow db uuid action = do - case Service.validateUUID uuid of - Left (Service.InvalidUUID errorMsg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 errorMsg} - Left _ -> throwError err400{errBody = "Invalid request"} - Right validUuidText -> - case UUID.fromText validUuidText of - Nothing -> throwError err400{errBody = "Invalid UUID format"} - Just validUuid -> - case M.lookup validUuid (dbTechFlows db) of - Just flow -> action (TechKind flow) - Nothing -> case M.lookup validUuid (dbBioFlows db) of - Just flow -> action (BioKind flow) - Nothing -> case M.lookup validUuid (dbWasteFlows db) of - Just flow -> action (WasteKind flow) - Nothing -> throwError err404{errBody = "Flow not found"} + validUuidText <- either throwServiceError pure (Service.validateUUID uuid) + validUuid <- maybe (throwError err400{errBody = "Invalid UUID format"}) pure (UUID.fromText validUuidText) + let lookups = + [ TechKind <$> M.lookup validUuid (dbTechFlows db) + , BioKind <$> M.lookup validUuid (dbBioFlows db) + , WasteKind <$> M.lookup validUuid (dbWasteFlows db) + ] + case asum lookups of + Just flow -> action flow + Nothing -> throwError err404{errBody = "Flow not found"} -- | Login request body newtype LoginRequest = LoginRequest @@ -480,20 +473,10 @@ HTTP status. Validates the resolved ProcessId against the technosphere matrix index too — see Service.validateProcessIdInMatrixIndex. -} resolveOrThrow :: Database -> Text -> AppM (ProcessId, Activity) -resolveOrThrow db processIdText = - case Service.resolveActivityAndProcessId db processIdText of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId msg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left e@(Service.MatrixError _) -> internalError e - Left e@(Service.InvalidUUID _) -> internalError e - Left e@(Service.FlowNotFound _) -> internalError e - Right (pid, act) -> - case Service.validateProcessIdInMatrixIndex db pid of - Left e -> internalError e - Right () -> return (pid, act) - where - internalError :: Service.ServiceError -> AppM a - internalError e = throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show e} +resolveOrThrow db processIdText = do + (pid, act) <- either throwServiceError pure (Service.resolveActivityAndProcessId db processIdText) + either throwServiceError pure (Service.validateProcessIdInMatrixIndex db pid) + pure (pid, act) {- | Translate a Service-level error into the HTTP status used across the cross-DB LCIA paths. @@ -1100,13 +1083,10 @@ lcaServer env = getActivityInfo dbName processId = do (db, _) <- requireDatabaseByName dbName unitCfg <- liftIO $ getMergedUnitConfig dbManager - case Service.getActivityInfo unitCfg db processId of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left _ -> throwError err500{errBody = "Internal server error"} - Right result -> case fromJSON result of - Success activityInfo -> return activityInfo - Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err} + result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId) + case fromJSON result of + Success activityInfo -> return activityInfo + Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err} -- Activity flows sub-resource getActivityFlows :: Text -> Text -> AppM [FlowSummary] @@ -1177,12 +1157,7 @@ lcaServer env = (db, sharedSolver) <- requireDatabaseByName dbName let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent - case result of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left _ -> throwError err500{errBody = "Internal server error"} - Right graphExport -> return graphExport + either throwServiceError pure result -- Build the supply-chain filter shared by the GET and POST handlers. buildSupplyChainFilter :: Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Service.SupplyChainFilter @@ -1236,13 +1211,7 @@ lcaServer env = Nothing -> do unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges - case result of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left (Service.InvalidUUID _) -> throwError err500{errBody = "Internal server error"} - Left (Service.FlowNotFound _) -> throwError err500{errBody = "Internal server error"} - Right supplyChain -> return supplyChain + either throwServiceError pure result Just subReq -> do (processId, _) <- resolveOrThrow db processIdText -- Use the cross-DB-aware substitution resolver so qualified PIDs in @@ -1367,12 +1336,7 @@ lcaServer env = unitCfg <- liftIO $ getMergedUnitConfig dbManager (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params - case result of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left (Service.MatrixError msg) -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left _ -> throwError err500{errBody = "Internal server error"} - Right agg -> return agg + either throwServiceError pure result where -- Parse "System=Value[:exact]" into (system, value, isExact). parseClassFilter :: Text -> Maybe (Text, Text, Bool) @@ -1523,11 +1487,7 @@ lcaServer env = , Service.cnfMaxDepth = maxDepthParam , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam } - case Service.getConsumers db dbName processIdText cnf of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId msg) -> throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} - Right consumers -> return consumers + either throwServiceError pure (Service.getConsumers db dbName processIdText cnf) -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity) getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value From 62de1622816e743efe98dbe02c35b487491d7840 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 23:21:22 +0200 Subject: [PATCH 21/43] refactor(api): finish AppM migration; lift handlers to top level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every Servant handler that previously lived in `lcaServer`'s `where`-block is now a top-level `:: AppM …` action that reads its env slice via `asks aeDbManager` / `aeMaxTreeDepth` / `aePassword` / `aeHostingConfig` / `aeClassificationPresets`. `lcaServer` body collapses to `hoistServer lcaAPI (runApp env) handlers`, with only the `:<|>` chain left inside `where`. This unblocks non-Servant reuse (BatchImpacts etc.) without the prior "hoist after the fact" gymnastics. The five Has-* capability classes are deleted — only `HasDatabaseManager` was used (at one site) and the others were dead code; `asks aeX` is the same length without the class indirection. Three near-identical `expandPreset + zipWith3` preset/explicit-filter merges (consumers, supply-chain core, search) collapse to one `mergeClassFilters` helper. Trim categorical prose that the implementations don't exploit: the "Coercible-witnessed iso / dictionary transport" paragraph in `Stripped` (no coerce in the file), the Milewski reference in `Validation` (replaced with the actual `ap = (<*>)` law that blocks a Monad instance), the "Free Applicative over a primitive parser action" paragraph on the optparse helpers (tautological w.r.t. the library), the "product of four monoids" / "product of monoids, componentwise" prose on `UnlinkedSummary` / `CrossDBLinkingStats` / `TreeStats`, the long App.Env head doc, and the dead `mempty,` re-export from `Database.Loader` (class methods are not exportable as symbols). Net: 9 files, +1022/-1142. `cabal build all` clean, lca-tests 1052/1052 green, `dump-openapi` byte-identical (md5 matches PR head). --- src/API/DatabaseHandlers.hs | 42 +- src/API/JsonOptions.hs | 18 +- src/API/Routes.hs | 1962 ++++++++++++++++++----------------- src/App/Env.hs | 91 +- src/CLI/Parser.hs | 9 +- src/Data/Validation.hs | 15 +- src/Database/Loader.hs | 12 +- src/Service.hs | 5 +- src/Types.hs | 10 +- 9 files changed, 1022 insertions(+), 1142 deletions(-) diff --git a/src/API/DatabaseHandlers.hs b/src/API/DatabaseHandlers.hs index aa011473..76f71253 100644 --- a/src/API/DatabaseHandlers.hs +++ b/src/API/DatabaseHandlers.hs @@ -131,13 +131,13 @@ import Database.Upload ( ) import qualified Database.UploadedDatabase as UploadedDB import Types (Database (..), GeographyPolicy (..), unresolvedCount) -import App.Env (AppM, HasDatabaseManager (..)) +import App.Env (AppEnv (..), AppM) import Control.Monad.Reader (asks) -- | List all databases getDatabases :: AppM DatabaseListResponse getDatabases = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager dbStatuses <- liftIO $ listDatabases dbManager let statusList = map convertDbStatus dbStatuses return $ DatabaseListResponse statusList @@ -145,7 +145,7 @@ getDatabases = do -- | Load a database on demand loadDatabaseHandler :: Text -> AppM LoadDatabaseResponse loadDatabaseHandler dbName= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager eitherResult <- liftIO $ try $ loadDatabase dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -158,7 +158,7 @@ loadDatabaseHandler dbName= do -- | Unload a database from memory unloadDatabaseHandler :: Text -> AppM ActivateResponse unloadDatabaseHandler dbName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager simpleAction (unloadDatabase dbManager dbName) ("Unloaded database: " <> dbName) {- | Re-run cross-DB linking for a loaded database against the currently-loaded @@ -167,7 +167,7 @@ suboptimal order without reloading the whole database. -} relinkDatabaseHandler :: Text -> AppM RelinkResponse relinkDatabaseHandler dbName= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager res <- liftIO $ relinkDatabase dbManager dbName case res of Left err -> throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 err} @@ -184,13 +184,13 @@ relinkDatabaseHandler dbName= do -- | Delete an uploaded database (move to trash) deleteDatabaseHandler :: Text -> AppM ActivateResponse deleteDatabaseHandler dbName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager simpleAction (removeDatabase dbManager dbName) ("Deleted database: " <> dbName) -- | Upload a new database uploadDatabaseHandler :: UploadRequest -> AppM UploadResponse uploadDatabaseHandler req= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager -- Decode base64 ZIP data let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case zipDataResult of @@ -324,7 +324,7 @@ Returns completeness, missing suppliers, and dependency suggestions -} getDatabaseSetupHandler :: Text -> AppM DatabaseSetupInfo getDatabaseSetupHandler dbName= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager eitherResult <- liftIO $ try $ getDatabaseSetupInfo dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -338,7 +338,7 @@ Runs cross-DB linking and returns updated setup info -} addDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo addDependencyHandler dbName depName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager ioEither400 (addDependencyToStaged dbManager dbName depName) {- | Remove a dependency from a staged database @@ -346,13 +346,13 @@ Re-runs cross-DB linking and returns updated setup info -} removeDependencyHandler :: Text -> Text -> AppM DatabaseSetupInfo removeDependencyHandler dbName depName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager ioEither400 (removeDependencyFromStaged dbManager dbName depName) -- | Change the data path for an uploaded (staged) database setDataPathHandler :: Text -> Value -> AppM DatabaseSetupInfo setDataPathHandler dbName body = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager -- Extract "path" from JSON body let mPath = case body of A.Object obj -> case KM.lookup "path" obj of @@ -368,7 +368,7 @@ Builds matrices and makes it ready for queries -} finalizeDatabaseHandler :: Text -> AppM ActivateResponse finalizeDatabaseHandler dbName= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager eitherResult <- liftIO $ try $ finalizeDatabase dbManager dbName case eitherResult of Left (ex :: SomeException) -> @@ -383,7 +383,7 @@ Same flow as database upload but creates MethodConfig entry -} uploadMethodHandler :: UploadRequest -> AppM UploadResponse uploadMethodHandler req= do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let zipDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case zipDataResult of Left err -> return $ UploadResponse False ("Invalid base64 data: " <> T.pack err) Nothing Nothing @@ -439,7 +439,7 @@ uploadMethodHandler req= do -- | Delete an uploaded method collection deleteMethodHandler :: Text -> AppM ActivateResponse deleteMethodHandler name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager simpleAction (removeMethodCollection dbManager name) ("Deleted method: " <> name) -- | Common pattern: run an IO action that returns Either Text (), map to ActivateResponse @@ -500,32 +500,32 @@ convertRefDataStatus s = listRefData :: RefDataKind -> AppM RefDataListResponse listRefData kind = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let (listFn, _, _, _, _, _) = rdOps kind statuses <- liftIO $ listFn dbManager return $ RefDataListResponse (map convertRefDataStatus statuses) loadRefData :: RefDataKind -> Text -> AppM ActivateResponse loadRefData kind name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let (_, loadFn, _, _, _, _) = rdOps kind simpleAction (loadFn dbManager name) ("Loaded: " <> name) unloadRefData :: RefDataKind -> Text -> AppM ActivateResponse unloadRefData kind name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let (_, _, unloadFn, _, _, _) = rdOps kind simpleAction (unloadFn dbManager name) ("Unloaded: " <> name) deleteRefData :: RefDataKind -> Text -> AppM ActivateResponse deleteRefData kind name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let (_, _, _, _, removeFn, _) = rdOps kind simpleAction (removeFn dbManager name) ("Deleted: " <> name) uploadRefData :: RefDataKind -> UploadRequest -> AppM UploadResponse uploadRefData kind req = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let (_, _, _, addFn, _, subdir) = rdOps kind let csvDataResult = B64.decode $ T.encodeUtf8 $ urFileData req case csvDataResult of @@ -564,7 +564,7 @@ uploadRefData kind req = do getFlowSynonymGroupsHandler :: Text -> AppM SynonymGroupsResponse getFlowSynonymGroupsHandler name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager result <- liftIO $ getFlowSynonymGroups dbManager name case result of Left err -> throwError $ err404{errBody = BSL.fromStrict $ T.encodeUtf8 err} @@ -572,7 +572,7 @@ getFlowSynonymGroupsHandler name = do downloadRefDataHandler :: RefDataKind -> Text -> AppM (Headers '[Header "Content-Disposition" Text] BinaryContent) downloadRefDataHandler kind name = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager let tvar = case kind of FlowSynonyms -> dmAvailableFlowSyns dbManager CompartmentMappings -> dmAvailableCompMaps dbManager diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs index 442fec54..15caea6e 100644 --- a/src/API/JsonOptions.hs +++ b/src/API/JsonOptions.hs @@ -53,17 +53,8 @@ strippedParseJSON = genericParseJSON stripLowerPrefix strippedSchemaOptions :: SchemaOptions strippedSchemaOptions = fromAesonOptions stripLowerPrefix -{- | DerivingVia carrier collapsing the ~180 hand-rolled @ToJSON@ \/ @FromJSON@ -\/ @ToSchema@ instance blocks that share the same shape: - -@ - toJSON = strippedToJSON - toEncoding = strippedToEncoding - parseJSON = strippedParseJSON - declareNamedSchema = genericDeclareNamedSchema strippedSchemaOptions -@ - -Usage: +{- | DerivingVia carrier: strips the lowercase field-name prefix +(@fooBar -> bar@) and uses the @Generic@ instances. Usage: @ data Foo = Foo { fooBar :: Int } deriving Generic @@ -71,11 +62,6 @@ Usage: deriving via (Stripped Foo) instance FromJSON Foo deriving via (Stripped Foo) instance ToSchema Foo @ - -Categorically: dictionary transport along the zero-cost iso witnessed by -'Coercible'. The instance bodies bypass the user-facing typeclass on @a@ and -call the @Generic@ helpers directly, avoiding the recursion that would arise -from @instance ToJSON a => ToJSON (Stripped a)@. -} newtype Stripped a = Stripped {unStripped :: a} diff --git a/src/API/Routes.hs b/src/API/Routes.hs index 4ebd1002..72cb4a68 100644 --- a/src/API/Routes.hs +++ b/src/API/Routes.hs @@ -56,7 +56,7 @@ import qualified SharedSolver import Tree (buildLoopAwareTree) import Types import qualified Version -import App.Env (AppEnv (..), AppM, HasDatabaseManager (..), runApp) +import App.Env (AppEnv (..), AppM, runApp) import Control.Monad.Reader (asks) -- | API type definition - RESTful design with focused endpoints @@ -189,7 +189,7 @@ notLoadedBody prefix name = BSL.fromStrict (T.encodeUtf8 (prefix <> name)) -- | Get database by name, throw 404 if not loaded requireDatabaseByName :: Text -> AppM (Database, SharedSolver) requireDatabaseByName dbName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager maybeLoaded <- liftIO $ getDatabase dbManager dbName case maybeLoaded of Just loaded -> return (ldDatabase loaded, ldSharedSolver loaded) @@ -232,7 +232,7 @@ methods. -} solutionWithDeps :: Text -> Database -> SharedSolver -> ProcessId -> AppM SharedSolver.CrossDBSolution solutionWithDeps dbName db solver pid = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager res <- @@ -256,7 +256,7 @@ inventoriesWithDeps dbName db solver pids = -- | Batch variant of 'solutionWithDeps'. solutionsWithDeps :: Text -> Database -> SharedSolver -> [ProcessId] -> AppM [SharedSolver.CrossDBSolution] solutionsWithDeps dbName db solver pids = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager res <- @@ -494,7 +494,7 @@ throwServiceError (Service.FlowNotFound _) = throwError err500{errBody = "Intern -- | Load a method collection by name from the live DatabaseManager state. loadCollection :: Text -> AppM ([Method], [DamageCategory], [NormWeightSet], [ScoringSet]) loadCollection collectionName = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager) case M.lookup collectionName loadedCollections of Just mc -> return (mcMethods mc, mcDamageCategories mc, mcNormWeightSets mc, mcScoringSets mc) @@ -508,7 +508,7 @@ crossDBSolutionFor :: Text -> Database -> SharedSolver -> ProcessId -> Maybe Sub crossDBSolutionFor dbName db solver pid mSub = case mSub of Nothing -> solutionWithDeps dbName db solver pid Just subReq -> do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager requireFullyLinked dbName db unitCfg <- liftIO $ getMergedUnitConfig dbManager eSol <- @@ -772,7 +772,7 @@ activityLCIABatchH :: Maybe SubstitutionRequest -> AppM LCIABatchResult activityLCIABatchH dbName processIdText collectionName mSub = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager (db, sharedSolver) <- requireDatabaseByName dbName (actProcessId, activity) <- resolveOrThrow db processIdText (methods, damageCats, nwSets, scoringSets) <- loadCollection collectionName @@ -836,7 +836,7 @@ batchImpactsH :: BatchImpactsRequest -> AppM BatchImpactsResponse batchImpactsH dbName collectionName topFlowsParam req = do - dbManager <- asks getDatabaseManager + dbManager <- asks aeDbManager (db, sharedSolver) <- requireDatabaseByName dbName loadedCollections <- liftIO $ readTVarIO (dmLoadedMethods dbManager) collection <- case M.lookup collectionName loadedCollections of @@ -896,24 +896,983 @@ batchImpactsH dbName collectionName topFlowsParam req = do , birInvalid = invalid } -{- | API server implementation. Handlers live in 'AppM' (a 'ReaderT' -'AppEnv' over Servant's 'Handler'); 'hoistServer' is the natural -transformation that lifts the @ServerT LCAAPI AppM@ into a plain -@Server LCAAPI@ Servant expects at the WAI boundary. --} +-- --------------------------------------------------------------------------- +-- Pure helpers shared by handlers +-- --------------------------------------------------------------------------- + +-- | Parse "System=Value[:exact]" into (system, value, isExact). +parseClassFilter :: Text -> Maybe (Text, Text, Bool) +parseClassFilter raw = + let (sys, rest) = T.breakOn "=" raw + in if T.null rest + then Nothing + else + let valAndMode = T.drop 1 rest + (val, mode) = T.breakOn ":" valAndMode + isExact = T.drop 1 mode == "exact" + in Just (T.strip sys, T.strip val, isExact) + +-- | Merge preset-derived and explicit (system, value, exact) classification filters. +mergeClassFilters + :: [Config.ClassificationPreset] + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> [(Text, Text, Bool)] +mergeClassFilters presets presetParam systems values modes = + expandPreset presets presetParam + ++ zipWith3 + (\s v m -> (s, v, m == "exact")) + systems + values + (modes ++ repeat "contains") + +-- | Build a 'Service.SupplyChainFilter' shared by GET and POST handlers. +buildSupplyChainFilter + :: [Config.ClassificationPreset] + -> Maybe Text + -> Maybe Int + -> Maybe Double + -> Maybe Int + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> Maybe Text + -> Maybe Text + -> Service.SupplyChainFilter +buildSupplyChainFilter presets nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam = + Service.SupplyChainFilter + { Service.scfCore = + Service.ActivityFilterCore + { Service.afcName = nameFilter + , Service.afcLocation = locationFilter + , Service.afcProduct = productFilter + , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes + , Service.afcLimit = limitParam + , Service.afcOffset = offsetParam + , Service.afcSort = sortParam + , Service.afcOrder = orderParam + } + , Service.scfMaxDepth = maxDepthParam + , Service.scfMinQuantity = minQuantity + } + +buildFlowEntry :: Database -> M.Map UUID (MethodCF, MatchStrategy) -> UUID -> FlowCFEntry +buildFlowEntry db reverseIndex uuid = + let mFlow = M.lookup uuid (dbBioFlows db) + mMatch = M.lookup uuid reverseIndex + in FlowCFEntry + { fceFlowId = uuid + , fceFlowName = maybe "" bfName mFlow + , fceFlowCategory = maybe "" bfCompartmentName mFlow + , fceCfValue = fmap (mcfValue . fst) mMatch + , fceCfFlowName = fmap (mcfFlowName . fst) mMatch + , fceMatchStrategy = fmap (strategyToText . snd) mMatch + } + +strategyToText :: MatchStrategy -> Text +strategyToText ByUUID = "uuid" +strategyToText ByCAS = "cas" +strategyToText ByName = "name" +strategyToText BySynonym = "synonym" +strategyToText ByFuzzy = "fuzzy" +strategyToText NoMatch = "none" + +matchesQuery :: Maybe Text -> Text -> Text -> Bool +matchesQuery Nothing _ _ = True +matchesQuery (Just q) cfName dbFlowName = + T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbFlowName) + +cfToAPI :: MethodCF -> MethodFactorAPI +cfToAPI cf = + MethodFactorAPI + { mfaFlowRef = mcfFlowRef cf + , mfaFlowName = mcfFlowName cf + , mfaDirection = case mcfDirection cf of + MT.Input -> "Input" + MT.Output -> "Output" + , mfaValue = mcfValue cf + } + +-- --------------------------------------------------------------------------- +-- AppM helpers +-- --------------------------------------------------------------------------- + +-- | Lookup a method by UUID across all loaded collections. +loadMethodByUUID :: Text -> AppM Method +loadMethodByUUID uuidText = do + dbManager <- asks aeDbManager + loadedMethods <- liftIO $ DM.getLoadedMethods dbManager + let allMethods = map snd loadedMethods + case UUID.fromText uuidText of + Nothing -> throwError err400{errBody = "Invalid method UUID format"} + Just uuid -> + case filter (\m -> methodId m == uuid) allMethods of + (m : _) -> return m + [] -> throwError err404{errBody = "Method not found"} + +-- | Resolve (db, solver, ProcessId, Activity, Method) and dispatch. +withActivityAndMethod + :: Text + -> Text + -> Text + -> (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a) + -> AppM a +withActivityAndMethod dbName processIdText methodIdText k = do + (db, sharedSolver) <- requireDatabaseByName dbName + method <- loadMethodByUUID methodIdText + case Service.resolveActivityAndProcessId db processIdText of + Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} + Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} + Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} + Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method + +-- --------------------------------------------------------------------------- +-- Servant handlers (top-level AppM actions) +-- --------------------------------------------------------------------------- + +getOpenApiSpec :: AppM Value +getOpenApiSpec = return $ toJSON volcaOpenApi + +getVersion :: AppM Value +getVersion = + return $ + object + [ "version" .= Version.version + , "gitHash" .= Version.gitHash + , "gitTag" .= Version.gitTag + , "buildTarget" .= Version.buildTarget + ] + +getHosting :: AppM Value +getHosting = do + hostingConfig <- asks aeHostingConfig + return $ case hostingConfig of + Just hc -> + object + [ "is_hosted" .= True + , "max_uploads" .= Config.hcMaxUploads hc + , "api_access" .= Config.hcApiAccess hc + , "upgrade_upload" .= Config.hcUpgradeUpload hc + , "upgrade_api" .= Config.hcUpgradeApi hc + , "upgrade_vm_size" .= Config.hcUpgradeVmSize hc + ] + Nothing -> + object + [ "is_hosted" .= False + , "max_uploads" .= (-1 :: Int) + , "api_access" .= True + , "upgrade_upload" .= ("" :: Text) + , "upgrade_api" .= ("" :: Text) + , "upgrade_vm_size" .= ("" :: Text) + ] + +getStats :: AppM Value +getStats = liftIO $ do + enabled <- GHC.Stats.getRTSStatsEnabled + if enabled + then do + stats <- GHC.Stats.getRTSStats + return $ + object + [ "memory_used_bytes" .= GHC.Stats.gcdetails_live_bytes (GHC.Stats.gc stats) + , "memory_allocated_bytes" .= GHC.Stats.allocated_bytes stats + , "gc_count" .= GHC.Stats.gcs stats + ] + else + return $ + object + ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)] + +getClassificationPresets :: AppM [ClassificationPresetInfo] +getClassificationPresets = do + presets <- asks aeClassificationPresets + return $ map toInfo presets + where + toInfo p = + ClassificationPresetInfo + { cpiName = Config.cpName p + , cpiLabel = Config.cpLabel p + , cpiDescription = Config.cpDescription p + , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p) + } + +getLogsHandler :: Maybe Int -> AppM Value +getLogsHandler sinceMaybe = do + let since = fromMaybe 0 sinceMaybe + (nextIndex, logLines) <- liftIO $ getLogLines since + return $ + object + [ "lines" .= logLines + , "nextIndex" .= nextIndex + ] + +postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value) +postAuth loginReq = do + password <- asks aePassword + case password of + Nothing -> + return $ noHeader $ object ["ok" .= True] + Just pwd -> + if T.unpack (lrCode loginReq) == pwd + then + let cookieValue = "volca_session=" ++ pwd ++ "; Path=/; HttpOnly; SameSite=Strict" + in return $ addHeader cookieValue $ object ["ok" .= True] + else + throwError err401{errBody = "{\"error\":\"invalid code\"}"} + +getActivityInfo :: Text -> Text -> AppM ActivityInfo +getActivityInfo dbName processId = do + dbManager <- asks aeDbManager + (db, _) <- requireDatabaseByName dbName + unitCfg <- liftIO $ getMergedUnitConfig dbManager + result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId) + case fromJSON result of + Success activityInfo -> return activityInfo + Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err} + +getActivityFlows :: Text -> Text -> AppM [FlowSummary] +getActivityFlows dbName processId = do + (db, _) <- requireDatabaseByName dbName + withValidatedActivity db processId $ \activity -> + return $ Service.getActivityFlowSummaries db activity + +getActivityInputs :: Text -> Text -> AppM [ExchangeDetail] +getActivityInputs dbName processId = do + (db, _) <- requireDatabaseByName dbName + withValidatedActivity db processId $ \activity -> + return $ Service.getActivityInputDetails db activity + +getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail] +getActivityOutputs dbName processId = do + (db, _) <- requireDatabaseByName dbName + withValidatedActivity db processId $ \activity -> + return $ Service.getActivityOutputDetails db activity + +getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail +getActivityReferenceProduct dbName processId = do + (db, _) <- requireDatabaseByName dbName + withValidatedActivity db processId $ \activity -> + case Service.getActivityReferenceProductDetail db activity of + Nothing -> throwError err404{errBody = "No reference product found"} + Just refProduct -> return refProduct + +getActivityTree :: Text -> Text -> AppM TreeExport +getActivityTree dbName processId = do + dbManager <- asks aeDbManager + maxTreeDepth <- asks aeMaxTreeDepth + (db, _) <- requireDatabaseByName dbName + withValidatedActivity db processId $ \_activity -> do + let activityUuidText = case T.splitOn "_" processId of + (uuid : _) -> uuid + [] -> processId + case UUID.fromText activityUuidText of + Nothing -> throwError err400{errBody = "Invalid activity UUID format"} + Just activityUuid -> do + unitCfg <- liftIO $ getMergedUnitConfig dbManager + let loopAwareTree = buildLoopAwareTree unitCfg db activityUuid maxTreeDepth + return $ Service.convertToTreeExport db processId maxTreeDepth loopAwareTree + +-- | Inventory with optional substitutions; goes through the cross-DB +-- back-substitution path so dep-DB inventories merge into the response. +activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport +activityInventoryCore dbName processIdText mSub = do + dbManager <- asks aeDbManager + (db, sharedSolver) <- requireDatabaseByName dbName + (processId, activity) <- resolveOrThrow db processIdText + sol <- crossDBSolutionFor dbName db sharedSolver processId mSub + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol) + +getActivityInventory :: Text -> Text -> AppM InventoryExport +getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing + +getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport +getActivityGraph dbName processId maybeCutoff = do + (db, sharedSolver) <- requireDatabaseByName dbName + let cutoffPercent = fromMaybe 1.0 maybeCutoff + result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent + either throwServiceError pure result + +-- | Supply-chain core (scaling-vector based). 'Nothing' takes the cached +-- solve; 'Just' applies substitutions via the cross-DB resolver. +activitySupplyChainCore + :: Text + -> Text + -> Maybe Text + -> Maybe Int + -> Maybe Double + -> Maybe Int + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> Maybe Text + -> Maybe Text + -> Maybe Bool + -> Maybe SubstitutionRequest + -> AppM SupplyChainResponse +activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do + dbManager <- asks aeDbManager + presets <- asks aeClassificationPresets + (db, sharedSolver) <- requireDatabaseByName dbName + let includeEdges = fromMaybe False includeEdgesParam + scf = + buildSupplyChainFilter + presets + nameFilter + limitParam + minQuantity + offsetParam + maxDepthParam + locationFilter + productFilter + presetParam + classSystems + classValues + classModes + sortParam + orderParam + case mSub of + Nothing -> do + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges + either throwServiceError pure result + Just subReq -> do + (processId, _) <- resolveOrThrow db processIdText + scalingResult <- + liftIO $ + Service.computeScalingVectorWithSubstitutionsCrossDB + (DM.mkDepSolverLookup dbManager) + db + dbName + sharedSolver + processId + (srSubstitutions subReq) + case scalingResult of + Left err -> throwServiceError err + Right (scalingVec, virtualLinks) -> do + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + eResp <- + liftIO $ + Service.buildSupplyChainFromScalingVectorCrossDB + unitCfg + (DM.mkDepSolverLookup dbManager) + db + dbName + processId + scalingVec + virtualLinks + scf + includeEdges + either throwServiceError pure eResp + +getActivitySupplyChain + :: Text + -> Text + -> Maybe Text + -> Maybe Int + -> Maybe Double + -> Maybe Int + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> Maybe Text + -> Maybe Text + -> Maybe Bool + -> AppM SupplyChainResponse +getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam = + activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing + +-- | Aggregate endpoint with accumulating field-level validation (a single +-- request can report invalid `scope` and invalid `aggregate` together). +getActivityAggregate + :: Text + -> Text + -> Maybe Text + -> Maybe Bool + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> Maybe Text + -> Maybe Text + -> Maybe Bool + -> Maybe Text + -> Maybe Text + -> AppM Aggregation +getActivityAggregate dbName processId scopeParam isInputParam maxDepthParam fnameParam fnameNotParam funitParam presetParam fclassParams ftargetParam fexchangeTypeParam freferenceParam groupByParam aggregateParam = do + dbManager <- asks aeDbManager + presets <- asks aeClassificationPresets + (db, sharedSolver) <- requireDatabaseByName dbName + let parseScope = \case + Just "direct" -> V.Success Agg.ScopeDirect + Just "supply_chain" -> V.Success Agg.ScopeSupplyChain + Just "biosphere" -> V.Success Agg.ScopeBiosphere + _ -> V.failure "scope must be one of: direct | supply_chain | biosphere" + parseExType = \case + Nothing -> V.Success Nothing + Just "technosphere" -> V.Success (Just Agg.KindTechnosphere) + Just "biosphere" -> V.Success (Just Agg.KindBiosphere) + Just "waste" -> V.Success (Just Agg.KindWaste) + Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste" + parseAgg = \case + Nothing -> V.Success Agg.AggSum + Just "sum_quantity" -> V.Success Agg.AggSum + Just "count" -> V.Success Agg.AggCount + Just "share" -> V.Success Agg.AggShare + Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")") + (scope, exchangeType, aggFn) <- + case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of + Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))} + Right v -> pure v + case (exchangeType, scope) of + (Just _, Agg.ScopeBiosphere) -> + throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"} + (Just _, Agg.ScopeSupplyChain) -> + throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"} + _ -> return () + let presetFilters = expandPreset presets presetParam + explicitFilters = mapMaybe parseClassFilter fclassParams + params = + Agg.AggregateParams + { Agg.apScope = scope + , Agg.apIsInput = isInputParam + , Agg.apMaxDepth = maxDepthParam + , Agg.apFilterName = fnameParam + , Agg.apFilterNameNot = maybe [] (map T.strip . T.splitOn ",") fnameNotParam + , Agg.apFilterUnit = funitParam + , Agg.apFilterClassifications = presetFilters ++ explicitFilters + , Agg.apFilterTargetName = ftargetParam + , Agg.apFilterExchangeType = exchangeType + , Agg.apFilterIsReference = freferenceParam + , Agg.apGroupBy = groupByParam + , Agg.apAggregate = aggFn + } + unitCfg <- liftIO $ getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params + either throwServiceError pure result + +-- | LCIA single-method core. GET passes a top-flows param and logs; +-- POST carries substitutions instead and skips logging. +activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult +activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do + dbManager <- asks aeDbManager + (db, sharedSolver) <- requireDatabaseByName dbName + method <- loadMethodByUUID methodIdText + (processId, activity) <- resolveOrThrow db processIdText + sol <- crossDBSolutionFor dbName db sharedSolver processId mSub + result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method + when (isNothing mSub) $ liftIO $ logLCIAResult result method + pure result + +getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult +getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam = + activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing + +postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult +postActivityLCIA dbName processIdText _collectionName methodIdText subReq = + activityLCIACore dbName processIdText methodIdText Nothing (Just subReq) + +-- | Sensitivity sweep: rank-1 perturbations on the root scaling, scored +-- through the cross-DB graph (regional CFs on dep DBs still apply). +postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse +postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do + dbManager <- asks aeDbManager + (db, sharedSolver) <- requireDatabaseByName dbName + requireFullyLinked dbName db + method <- loadMethodByUUID methodIdText + (processId, activity) <- resolveOrThrow db processIdText + eRes <- liftIO $ Service.computeSensitivities db sharedSolver processId (srPerturbations senReq) + (baselineX, perResults) <- either throwServiceError pure eRes + unitCfg <- liftIO $ getMergedUnitConfig dbManager + let depLookup = DM.mkDepSolverLookup dbManager + scaleToSolution x = do + eSol <- + SharedSolver.goWithDepsFromScalings + unitCfg + depLookup + db + dbName + [] + [x] + 0 + pure $ case eSol of + Left err -> Left err + Right (sol : _) -> Right sol + Right [] -> Left "cross-DB propagation returned empty result" + buildEntry baselineLcia (p, eitherX) = case eitherX of + Left err -> pure (PerturbedEntry p (Left err)) + Right x' -> do + eSol <- scaleToSolution x' + case eSol of + Left err -> pure (PerturbedEntry p (Left err)) + Right sol -> do + lcia <- computeCategoryResult dbManager dbName db sol activity 5 Nothing method + pure (PerturbedEntry p (Right (lcia, lrScore lcia - lrScore baselineLcia))) + eBaselineSol <- liftIO $ scaleToSolution baselineX + baselineSol <- + either + (\err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err}) + pure + eBaselineSol + baselineLcia <- + liftIO $ + computeCategoryResult dbManager dbName db baselineSol activity 5 Nothing method + perturbed <- + liftIO $ + mapConcurrently (buildEntry baselineLcia) perResults + pure SensitivityResponse{srBaseline = baselineLcia, srPerturbed = perturbed} + +getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult +getActivityLCIABatch dbName processIdText collectionName = + activityLCIABatchH dbName processIdText collectionName Nothing + +postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult +postActivityLCIABatch dbName processIdText collectionName subReq = + activityLCIABatchH dbName processIdText collectionName (Just subReq) + +postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport +postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq) + +postActivitySupplyChain + :: Text + -> Text + -> Maybe Text + -> Maybe Int + -> Maybe Double + -> Maybe Int + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> Maybe Text + -> Maybe Text + -> Maybe Bool + -> SubstitutionRequest + -> AppM SupplyChainResponse +postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq = + activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq) + +getActivityConsumers + :: Text + -> Text + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> Maybe Text + -> [Text] + -> [Text] + -> [Text] + -> Maybe Int + -> Maybe Int + -> Maybe Int + -> Maybe Text + -> Maybe Text + -> Maybe Bool + -> AppM ConsumersResponse +getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do + presets <- asks aeClassificationPresets + (db, _) <- requireDatabaseByName dbName + let cnf = + Service.ConsumerFilter + { Service.cnfCore = + Service.ActivityFilterCore + { Service.afcName = nameFilter + , Service.afcLocation = locationFilter + , Service.afcProduct = productFilter + , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes + , Service.afcLimit = limitParam + , Service.afcOffset = offsetParam + , Service.afcSort = sortParam + , Service.afcOrder = orderParam + } + , Service.cnfMaxDepth = maxDepthParam + , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam + } + either throwServiceError pure (Service.getConsumers db dbName processIdText cnf) + +getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value +getActivityPathTo dbName processIdText targetParam = do + (db, solver) <- requireDatabaseByName dbName + target <- + maybe + (throwError err400{errBody = "Missing required 'target' query parameter"}) + pure + targetParam + result <- liftIO $ Service.getPathTo db solver processIdText target + case result of + Left (Service.ActivityNotFound msg) -> + throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 msg} + Left (Service.InvalidProcessId msg) -> + throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg} + Left err -> + throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} + Right val -> return val + +getActivityAnalyze :: Text -> Text -> Text -> AppM Value +getActivityAnalyze dbName processIdText analyzerName = do + dbManager <- asks aeDbManager + (db, sharedSolver) <- requireDatabaseByName dbName + case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of + Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)} + Just analyzer -> do + case Service.resolveActivityAndProcessId db processIdText of + Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} + Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} + Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} + Right (actProcessId, _) -> do + inventory <- inventoryWithDeps dbName db sharedSolver actProcessId + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + loadedMethods <- liftIO $ DM.getLoadedMethods dbManager + let methods = map snd loadedMethods + ctx = + AnalyzeContext + { acDatabase = db + , acInventory = inventory + , acMethods = methods + , acParameters = M.empty + , acTechFlowDB = M.empty + , acBioFlowDB = mFlows + , acUnitDB = mUnits + } + liftIO $ ahAnalyze analyzer ctx + +getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult +getContributingFlows dbName processIdText _collectionName methodIdText limitParam = + withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do + dbManager <- asks aeDbManager + let lim = fromMaybe 20 limitParam + unitCfg <- liftIO $ getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + inventory <- inventoryWithDeps dbName db sharedSolver actProcessId + tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method + let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables) + (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables + contribs = sortOn (\(_, _, c) -> negate (abs c)) rawContribs + topFlows = + [ FlowContributionEntry + { fcoFlowName = bfName f + , fcoContribution = c + , fcoSharePct = if score /= 0 then c / score * 100 else 0 + , fcoFlowId = UUID.toText (bfId f) + , fcoCategory = bfCompartmentName f + , fcoCompartment = bfCompartmentSub f + , fcoCfValue = cfVal + } + | (f, cfVal, c) <- take lim contribs + ] + liftIO $ + unless (null unknownUuids) $ + reportProgress Warning $ + "[contributing-flows " + <> T.unpack (methodName method) + <> "] " + <> show (length unknownUuids) + <> " inventory flow UUID(s) absent from merged FlowDB. Samples: " + <> show (take 3 unknownUuids) + return + ContributingFlowsResult + { cfrMethod = methodName method + , cfrUnit = methodUnit method + , cfrTotalScore = score + , cfrTopFlows = topFlows + } + +getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult +getContributingActivities dbName processIdText _collectionName methodIdText limitParam = + withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do + dbManager <- asks aeDbManager + let lim = fromMaybe 10 limitParam + requireFullyLinked dbName db + unitCfg <- liftIO $ getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method + eContribs <- + liftIO $ + SharedSolver.crossDBProcessContributions + unitCfg + mUnits + mFlows + (DM.mkDepSolverLookup dbManager) + db + dbName + sharedSolver + actProcessId + tables + case eContribs of + Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err} + Right contributions -> do + let score = sum (M.elems contributions) + sorted = sortOn (\(_, c) -> negate (abs c)) (M.toList contributions) + top = take lim sorted + rows <- liftIO $ mapM (mkCrossDBContrib dbManager dbName mFlows mUnits score) top + return + ContributingActivitiesResult + { carMethod = methodName method + , carUnit = methodUnit method + , carTotalScore = score + , carActivities = rows + } + +getFlowDetail :: Text -> Text -> AppM FlowDetail +getFlowDetail dbName flowIdText = do + (db, _) <- requireDatabaseByName dbName + withValidatedFlow db flowIdText $ \flow -> do + let fid = flowKindId flow + unitName' = flowKindUnitName (dbUnits db) flow + usageCount = Service.getFlowUsageCount db fid + return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount + +getFlowActivities :: Text -> Text -> AppM [ActivitySummary] +getFlowActivities dbName flowIdText = do + (db, _) <- requireDatabaseByName dbName + withValidatedFlow db flowIdText $ \flow -> + return $ Service.getActivitiesUsingFlow db (flowKindId flow) + +getMethods :: AppM [MethodSummary] +getMethods = do + dbManager <- asks aeDbManager + loadedMethods <- liftIO $ DM.getLoadedMethods dbManager + return + [ MethodSummary + { msmId = methodId m + , msmName = methodName m + , msmCategory = methodCategory m + , msmUnit = methodUnit m + , msmFactorCount = length (methodFactors m) + , msmCollection = collName + } + | (collName, m) <- loadedMethods + ] + +getMethodDetail :: Text -> AppM MethodDetail +getMethodDetail methodIdText = do + method <- loadMethodByUUID methodIdText + return $ + MethodDetail + { mdId = methodId method + , mdName = methodName method + , mdDescription = methodDescription method + , mdUnit = methodUnit method + , mdCategory = methodCategory method + , mdMethodology = methodMethodology method + , mdFactorCount = length (methodFactors method) + } + +getMethodFactors :: Text -> AppM [MethodFactorAPI] +getMethodFactors methodIdText = do + method <- loadMethodByUUID methodIdText + return $ map cfToAPI (methodFactors method) + +getMethodMapping :: Text -> Text -> AppM MappingStatus +getMethodMapping dbName methodIdText = do + dbManager <- asks aeDbManager + (db, _) <- requireDatabaseByName dbName + method <- loadMethodByUUID methodIdText + mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method + let stats = computeMappingStats mappings + totalFactors = length mappings + coverage = + if totalFactors > 0 + then fromIntegral (totalFactors - msUnmatched stats) / fromIntegral totalFactors * 100 + else 0.0 + unmappedFlows = + take + 50 + [ UnmappedFlowAPI + { ufaFlowRef = mcfFlowRef cf + , ufaFlowName = mcfFlowName cf + , ufaDirection = case mcfDirection cf of + MT.Input -> "Input" + MT.Output -> "Output" + } + | (cf, Nothing) <- mappings + ] + uniqueDbFlows = S.size $ S.fromList [bfId f | (_, Just (f, _)) <- mappings] + return + MappingStatus + { mstMethodId = methodId method + , mstMethodName = methodName method + , mstTotalFactors = msTotal stats + , mstMappedByUUID = msByUUID stats + , mstMappedByCAS = msByCAS stats + , mstMappedByName = msByName stats + , mstMappedBySynonym = msBySynonym stats + , mstUnmapped = msUnmatched stats + , mstCoverage = coverage + , mstDbBiosphereCount = fromIntegral (dbBiosphereCount db) + , mstUniqueDbFlowsMatched = uniqueDbFlows + , mstUnmappedFlows = unmappedFlows + } + +getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping +getFlowCFMapping dbName methodIdText = do + dbManager <- asks aeDbManager + (db, _) <- requireDatabaseByName dbName + method <- loadMethodByUUID methodIdText + mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method + let reverseIndex = + M.fromList + [(bfId f, (cf, strat)) | (cf, Just (f, strat)) <- mappings] + entries = map (buildFlowEntry db reverseIndex) (V.toList (dbBiosphereOrder db)) + matchedCount = length [() | e <- entries, isJust (fceCfValue e)] + return + FlowCFMapping + { fcmMethodName = methodName method + , fcmMethodUnit = methodUnit method + , fcmTotalFlows = fromIntegral (dbBiosphereCount db) + , fcmMatchedFlows = matchedCount + , fcmFlows = entries + } + +getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult +getCharacterization dbName methodIdText flowFilter limitParam = do + dbManager <- asks aeDbManager + (db, _) <- requireDatabaseByName dbName + method <- loadMethodByUUID methodIdText + let lim = fromMaybe 50 limitParam + queryLower = fmap T.toLower flowFilter + mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method + let matched = + [ (cf, f, strat) + | (cf, Just (f, strat)) <- mappings + , matchesQuery queryLower (mcfFlowName cf) (bfName f) + ] + sorted = sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched + top = take lim sorted + mkEntry (cf, f, strat) = + CharacterizationEntry + { cheMethodFlowName = mcfFlowName cf + , cheCfValue = mcfValue cf + , cheCfUnit = mcfUnit cf + , cheDirection = case mcfDirection cf of + MT.Input -> "Input" + MT.Output -> "Output" + , cheDbFlowName = bfName f + , cheFlowId = UUID.toText (bfId f) + , cheFlowUnit = getUnitNameForBioFlow (dbUnits db) f + , cheCategory = bfCompartmentName f + , cheCompartment = bfCompartmentSub f + , cheMatchStrategy = strategyToText strat + } + return + CharacterizationResult + { chrMethod = methodName method + , chrUnit = methodUnit method + , chrMatches = length matched + , chrShown = length top + , chrFactors = map mkEntry top + } + +getMethodCollections :: AppM MethodCollectionListResponse +getMethodCollections = do + dbManager <- asks aeDbManager + statuses <- liftIO $ DM.listMethodCollections dbManager + return $ + MethodCollectionListResponse + [ MethodCollectionStatusAPI + { mcaName = mcsName s + , mcaDisplayName = mcsDisplayName s + , mcaDescription = mcsDescription s + , mcaStatus = case mcsStatus s of + DM.Loaded -> "loaded" + _ -> "unloaded" + , mcaIsUploaded = mcsIsUploaded s + , mcaPath = mcsPath s + , mcaMethodCount = mcsMethodCount s + , mcaFormat = Just (mcsFormat s) + } + | s <- statuses + ] + +loadMethodCollectionHandler :: Text -> AppM ActivateResponse +loadMethodCollectionHandler name = do + dbManager <- asks aeDbManager + simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name) + +unloadMethodCollectionHandler :: Text -> AppM ActivateResponse +unloadMethodCollectionHandler name = do + dbManager <- asks aeDbManager + simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name) + +searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult) +searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do + (db, _) <- requireDatabaseByName dbName + case queryParam of + Nothing -> return (SearchResults [] 0 0 50 False 0.0) + Just query -> do + let ff = + Service.FlowFilter + { Service.ffQuery = query + , Service.ffLang = langParam + , Service.ffLimit = limitParam + , Service.ffOffset = offsetParam + , Service.ffSort = sortParam + , Service.ffOrder = orderParam + } + searchFlowsInternal db ff + +searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary) +searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do + presets <- asks aeClassificationPresets + (db, _) <- requireDatabaseByName dbName + let exactMatch = fromMaybe False exactParam + sf = + Service.SearchFilter + { Service.sfCore = + Service.ActivityFilterCore + { Service.afcName = nameParam + , Service.afcLocation = geoParam + , Service.afcProduct = productParam + , Service.afcClassifications = mergeClassFilters presets presetParam classSystems classValues classModes + , Service.afcLimit = limitParam + , Service.afcOffset = offsetParam + , Service.afcSort = sortParam + , Service.afcOrder = orderParam + } + , Service.sfExactMatch = exactMatch + } + result <- liftIO $ Service.searchActivities db sf + case result of + Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} + Right jsonValue -> case fromJSON jsonValue of + Success searchResults -> return searchResults + Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr} + +getClassifications :: Text -> AppM [ClassificationSystem] +getClassifications dbName = do + (db, _) <- requireDatabaseByName dbName + return $ Service.getClassifications db + +postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse +postImpactsBatch = batchImpactsH + +-- --------------------------------------------------------------------------- +-- Servant server +-- --------------------------------------------------------------------------- + lcaServer :: AppEnv -> Server LCAAPI -lcaServer env = - hoistServer lcaAPI (runApp env) handlers +lcaServer env = hoistServer lcaAPI (runApp env) handlers where - -- Legacy closure access to env fields, kept so handler bodies (and - -- the helpers they call) don't all need an explicit `asks`. We could - -- replace this with capability-class projections at every call site, - -- but the let-binding is identical in LOC and easier to read. - dbManager = aeDbManager env - maxTreeDepth = aeMaxTreeDepth env - password = aePassword env - hostingConfig = aeHostingConfig env - classificationPresets = aeClassificationPresets env handlers = getActivityInfo :<|> getActivityFlows @@ -965,7 +1924,6 @@ lcaServer env = :<|> unloadMethodCollectionHandler :<|> DBHandlers.deleteMethodHandler :<|> DBHandlers.uploadMethodHandler - -- Flow synonyms :<|> DBHandlers.listRefData DBHandlers.FlowSynonyms :<|> DBHandlers.loadRefData DBHandlers.FlowSynonyms :<|> DBHandlers.unloadRefData DBHandlers.FlowSynonyms @@ -973,13 +1931,11 @@ lcaServer env = :<|> DBHandlers.uploadRefData DBHandlers.FlowSynonyms :<|> DBHandlers.getFlowSynonymGroupsHandler :<|> DBHandlers.downloadRefDataHandler DBHandlers.FlowSynonyms - -- Compartment mappings :<|> DBHandlers.listRefData DBHandlers.CompartmentMappings :<|> DBHandlers.loadRefData DBHandlers.CompartmentMappings :<|> DBHandlers.unloadRefData DBHandlers.CompartmentMappings :<|> DBHandlers.deleteRefData DBHandlers.CompartmentMappings :<|> DBHandlers.uploadRefData DBHandlers.CompartmentMappings - -- Units :<|> DBHandlers.listRefData DBHandlers.UnitDefs :<|> DBHandlers.loadRefData DBHandlers.UnitDefs :<|> DBHandlers.unloadRefData DBHandlers.UnitDefs @@ -992,958 +1948,6 @@ lcaServer env = :<|> getStats :<|> getClassificationPresets :<|> getOpenApiSpec - getOpenApiSpec :: AppM Value - getOpenApiSpec = return $ toJSON volcaOpenApi - - getVersion :: AppM Value - getVersion = - return $ - object - [ "version" .= Version.version - , "gitHash" .= Version.gitHash - , "gitTag" .= Version.gitTag - , "buildTarget" .= Version.buildTarget - ] - - getHosting :: AppM Value - getHosting = return $ case hostingConfig of - Just hc -> - object - [ "is_hosted" .= True - , "max_uploads" .= Config.hcMaxUploads hc - , "api_access" .= Config.hcApiAccess hc - , "upgrade_upload" .= Config.hcUpgradeUpload hc - , "upgrade_api" .= Config.hcUpgradeApi hc - , "upgrade_vm_size" .= Config.hcUpgradeVmSize hc - ] - Nothing -> - object - [ "is_hosted" .= False - , "max_uploads" .= (-1 :: Int) - , "api_access" .= True - , "upgrade_upload" .= ("" :: Text) - , "upgrade_api" .= ("" :: Text) - , "upgrade_vm_size" .= ("" :: Text) - ] - - getStats :: AppM Value - getStats = liftIO $ do - enabled <- GHC.Stats.getRTSStatsEnabled - if enabled - then do - stats <- GHC.Stats.getRTSStats - return $ - object - [ "memory_used_bytes" .= GHC.Stats.gcdetails_live_bytes (GHC.Stats.gc stats) - , "memory_allocated_bytes" .= GHC.Stats.allocated_bytes stats - , "gc_count" .= GHC.Stats.gcs stats - ] - else - return $ - object - ["error" .= ("RTS stats not enabled. Run with +RTS -T to enable." :: Text)] - - getClassificationPresets :: AppM [ClassificationPresetInfo] - getClassificationPresets = return $ map toInfo classificationPresets - where - toInfo p = - ClassificationPresetInfo - { cpiName = Config.cpName p - , cpiLabel = Config.cpLabel p - , cpiDescription = Config.cpDescription p - , cpiFilters = map (\e -> ClassificationEntryInfo (Config.ceSystem e) (Config.ceValue e) (Config.ceMode e)) (Config.cpFilters p) - } - - getLogsHandler :: Maybe Int -> AppM Value - getLogsHandler sinceMaybe = do - let since = fromMaybe 0 sinceMaybe - (nextIndex, logLines) <- liftIO $ getLogLines since - return $ - object - [ "lines" .= logLines - , "nextIndex" .= nextIndex - ] - - postAuth :: LoginRequest -> AppM (Headers '[Header "Set-Cookie" String] Value) - postAuth loginReq = - case password of - Nothing -> - -- No password configured, auth always succeeds - return $ noHeader $ object ["ok" .= True] - Just pwd -> - if T.unpack (lrCode loginReq) == pwd - then - let cookieValue = "volca_session=" ++ pwd ++ "; Path=/; HttpOnly; SameSite=Strict" - in return $ addHeader cookieValue $ object ["ok" .= True] - else - throwError err401{errBody = "{\"error\":\"invalid code\"}"} - - -- Core activity endpoint - streamlined data - getActivityInfo :: Text -> Text -> AppM ActivityInfo - getActivityInfo dbName processId = do - (db, _) <- requireDatabaseByName dbName - unitCfg <- liftIO $ getMergedUnitConfig dbManager - result <- either throwServiceError pure (Service.getActivityInfo unitCfg db processId) - case fromJSON result of - Success activityInfo -> return activityInfo - Error err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack err} - - -- Activity flows sub-resource - getActivityFlows :: Text -> Text -> AppM [FlowSummary] - getActivityFlows dbName processId = do - (db, _) <- requireDatabaseByName dbName - withValidatedActivity db processId $ \activity -> - return $ Service.getActivityFlowSummaries db activity - - -- Activity inputs sub-resource - getActivityInputs :: Text -> Text -> AppM [ExchangeDetail] - getActivityInputs dbName processId = do - (db, _) <- requireDatabaseByName dbName - withValidatedActivity db processId $ \activity -> - return $ Service.getActivityInputDetails db activity - - -- Activity outputs sub-resource - getActivityOutputs :: Text -> Text -> AppM [ExchangeDetail] - getActivityOutputs dbName processId = do - (db, _) <- requireDatabaseByName dbName - withValidatedActivity db processId $ \activity -> - return $ Service.getActivityOutputDetails db activity - - -- Activity reference product sub-resource - getActivityReferenceProduct :: Text -> Text -> AppM FlowDetail - getActivityReferenceProduct dbName processId = do - (db, _) <- requireDatabaseByName dbName - withValidatedActivity db processId $ \activity -> do - case Service.getActivityReferenceProductDetail db activity of - Nothing -> throwError err404{errBody = "No reference product found"} - Just refProduct -> return refProduct - - -- Activity tree export for visualization (configurable depth) - getActivityTree :: Text -> Text -> AppM TreeExport - getActivityTree dbName processId = do - (db, _) <- requireDatabaseByName dbName - withValidatedActivity db processId $ \_activity -> do - -- Use CLI --tree-depth option for configurable depth - -- Default depth limit prevents DOS attacks via deep tree requests - -- Extract activity UUID from processId (format: activityUUID_productUUID) - let activityUuidText = case T.splitOn "_" processId of - (uuid : _) -> uuid - [] -> processId -- Fallback - case UUID.fromText activityUuidText of - Nothing -> throwError err400{errBody = "Invalid activity UUID format"} - Just activityUuid -> do - unitCfg <- liftIO $ getMergedUnitConfig dbManager - let loopAwareTree = buildLoopAwareTree unitCfg db activityUuid maxTreeDepth - return $ Service.convertToTreeExport db processId maxTreeDepth loopAwareTree - - -- Activity inventory calculation (full supply chain LCI). - -- Goes through the cross-DB back-substitution path so inventories from - -- dep DBs are merged into the returned flow map; metadata (flow names, - -- units) comes from the merged FlowDB/UnitDB snapshot. - activityInventoryCore :: Text -> Text -> Maybe SubstitutionRequest -> AppM InventoryExport - activityInventoryCore dbName processIdText mSub = do - (db, sharedSolver) <- requireDatabaseByName dbName - (processId, activity) <- resolveOrThrow db processIdText - sol <- crossDBSolutionFor dbName db sharedSolver processId mSub - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - pure $ Service.convertToInventoryExport db mFlows mUnits processId activity (SharedSolver.csInventory sol) - - getActivityInventory :: Text -> Text -> AppM InventoryExport - getActivityInventory dbName processIdText = activityInventoryCore dbName processIdText Nothing - - -- Activity graph endpoint for network visualization - getActivityGraph :: Text -> Text -> Maybe Double -> AppM GraphExport - getActivityGraph dbName processId maybeCutoff = do - (db, sharedSolver) <- requireDatabaseByName dbName - let cutoffPercent = fromMaybe 1.0 maybeCutoff -- Default to 1% cutoff - result <- liftIO $ Service.buildActivityGraph db sharedSolver processId cutoffPercent - either throwServiceError pure result - - -- Build the supply-chain filter shared by the GET and POST handlers. - buildSupplyChainFilter :: Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Service.SupplyChainFilter - buildSupplyChainFilter nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam = - let presetFilters = expandPreset classificationPresets presetParam - explicitFilters = - zipWith3 - (\s v m -> (s, v, m == "exact")) - classSystems - classValues - (classModes ++ repeat "contains") - classFilters = presetFilters ++ explicitFilters - in Service.SupplyChainFilter - { Service.scfCore = - Service.ActivityFilterCore - { Service.afcName = nameFilter - , Service.afcLocation = locationFilter - , Service.afcProduct = productFilter - , Service.afcClassifications = classFilters - , Service.afcLimit = limitParam - , Service.afcOffset = offsetParam - , Service.afcSort = sortParam - , Service.afcOrder = orderParam - } - , Service.scfMaxDepth = maxDepthParam - , Service.scfMinQuantity = minQuantity - } - - -- Activity supply chain endpoint (scaling vector based). 'Nothing' takes the - -- cached solve; 'Just' resolves substitutions through the cross-DB resolver. - activitySupplyChainCore :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe SubstitutionRequest -> AppM SupplyChainResponse - activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam mSub = do - (db, sharedSolver) <- requireDatabaseByName dbName - let includeEdges = fromMaybe False includeEdgesParam - scf = - buildSupplyChainFilter - nameFilter - limitParam - minQuantity - offsetParam - maxDepthParam - locationFilter - productFilter - presetParam - classSystems - classValues - classModes - sortParam - orderParam - case mSub of - Nothing -> do - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - result <- liftIO $ Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName sharedSolver processIdText scf includeEdges - either throwServiceError pure result - Just subReq -> do - (processId, _) <- resolveOrThrow db processIdText - -- Use the cross-DB-aware substitution resolver so qualified PIDs in - -- subFrom/subTo are accepted; the virtual cross-DB links it returns - -- don't affect the root scaling vector (they drive dep-DB demand), - -- which is all the supply-chain navigation reads. - scalingResult <- - liftIO $ - Service.computeScalingVectorWithSubstitutionsCrossDB - (DM.mkDepSolverLookup dbManager) - db - dbName - sharedSolver - processId - (srSubstitutions subReq) - case scalingResult of - Left err -> throwServiceError err - Right (scalingVec, virtualLinks) -> do - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - eResp <- - liftIO $ - Service.buildSupplyChainFromScalingVectorCrossDB - unitCfg - (DM.mkDepSolverLookup dbManager) - db - dbName - processId - scalingVec - virtualLinks - scf - includeEdges - either throwServiceError pure eResp - - getActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM SupplyChainResponse - getActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam = - activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam Nothing - - -- Activity aggregate endpoint (generic SQL-group-by-style aggregation) - getActivityAggregate :: - Text -> - Text -> - Maybe Text -> -- scope - Maybe Bool -> -- is_input - Maybe Int -> -- max_depth - Maybe Text -> -- filter_name - Maybe Text -> -- filter_name_not - Maybe Text -> -- filter_unit - Maybe Text -> -- preset - [Text] -> -- filter_classification (repeatable: "System=Value[:exact]") - Maybe Text -> -- filter_target_name - Maybe Text -> -- filter_exchange_type ("technosphere" | "biosphere" | "waste") - Maybe Bool -> -- filter_is_reference - Maybe Text -> -- group_by - Maybe Text -> -- aggregate fn - AppM Aggregation - getActivityAggregate - dbName - processId - scopeParam - isInputParam - maxDepthParam - fnameParam - fnameNotParam - funitParam - presetParam - fclassParams - ftargetParam - fexchangeTypeParam - freferenceParam - groupByParam - aggregateParam = do - (db, sharedSolver) <- requireDatabaseByName dbName - -- Field-level validation via the Validation Applicative (accumulating). - -- A request with both an invalid `scope` and an invalid `aggregate` - -- now reports both errors at once, instead of just the first. - let parseScope = \case - Just "direct" -> V.Success Agg.ScopeDirect - Just "supply_chain" -> V.Success Agg.ScopeSupplyChain - Just "biosphere" -> V.Success Agg.ScopeBiosphere - _ -> V.failure "scope must be one of: direct | supply_chain | biosphere" - parseExType = \case - Nothing -> V.Success Nothing - Just "technosphere" -> V.Success (Just Agg.KindTechnosphere) - Just "biosphere" -> V.Success (Just Agg.KindBiosphere) - Just "waste" -> V.Success (Just Agg.KindWaste) - Just _ -> V.failure "filter_exchange_type must be one of: technosphere | biosphere | waste" - parseAgg = \case - Nothing -> V.Success Agg.AggSum - Just "sum_quantity" -> V.Success Agg.AggSum - Just "count" -> V.Success Agg.AggCount - Just "share" -> V.Success Agg.AggShare - Just other -> V.failure ("aggregate must be one of: sum_quantity | count | share (got " <> other <> ")") - (scope, exchangeType, aggFn) <- - case V.toEither $ (,,) <$> parseScope scopeParam <*> parseExType fexchangeTypeParam <*> parseAgg aggregateParam of - Left errs -> throwError err400{errBody = BSL.fromStrict (T.encodeUtf8 (T.intercalate "; " (NE.toList errs)))} - Right v -> pure v - -- Cross-check requires the parsed scope value, so it runs after the - -- Applicative phase. Validation is not a Monad, by design. - case (exchangeType, scope) of - (Just _, Agg.ScopeBiosphere) -> - throwError err400{errBody = "filter_exchange_type is redundant with scope=biosphere"} - (Just _, Agg.ScopeSupplyChain) -> - throwError err400{errBody = "filter_exchange_type is not supported with scope=supply_chain (all entries are technosphere)"} - _ -> return () - let presetFilters = expandPreset classificationPresets presetParam - explicitFilters = mapMaybe parseClassFilter fclassParams - params = - Agg.AggregateParams - { Agg.apScope = scope - , Agg.apIsInput = isInputParam - , Agg.apMaxDepth = maxDepthParam - , Agg.apFilterName = fnameParam - , Agg.apFilterNameNot = maybe [] (map T.strip . T.splitOn ",") fnameNotParam - , Agg.apFilterUnit = funitParam - , Agg.apFilterClassifications = presetFilters ++ explicitFilters - , Agg.apFilterTargetName = ftargetParam - , Agg.apFilterExchangeType = exchangeType - , Agg.apFilterIsReference = freferenceParam - , Agg.apGroupBy = groupByParam - , Agg.apAggregate = aggFn - } - unitCfg <- liftIO $ getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - result <- liftIO $ Agg.aggregate unitCfg mFlows mUnits db dbName sharedSolver (DM.mkDepSolverLookup dbManager) processId params - either throwServiceError pure result - where - -- Parse "System=Value[:exact]" into (system, value, isExact). - parseClassFilter :: Text -> Maybe (Text, Text, Bool) - parseClassFilter raw = - let (sys, rest) = T.breakOn "=" raw - in if T.null rest - then Nothing - else - let valAndMode = T.drop 1 rest - (val, mode) = T.breakOn ":" valAndMode - isExact = T.drop 1 mode == "exact" - in Just (T.strip sys, T.strip val, isExact) - - -- Activity LCIA endpoint (single method within a collection). The GET - -- variant honours a top-flows query param and logs the result; the POST - -- route carries neither (no top-flows param, no logging). - activityLCIACore :: Text -> Text -> Text -> Maybe Int -> Maybe SubstitutionRequest -> AppM LCIAResult - activityLCIACore dbName processIdText methodIdText topFlowsParam mSub = do - (db, sharedSolver) <- requireDatabaseByName dbName - method <- loadMethodByUUID methodIdText - (processId, activity) <- resolveOrThrow db processIdText - sol <- crossDBSolutionFor dbName db sharedSolver processId mSub - result <- liftIO $ computeCategoryResult dbManager dbName db sol activity (fromMaybe 5 topFlowsParam) Nothing method - when (isNothing mSub) $ liftIO $ logLCIAResult result method - pure result - - getActivityLCIA :: Text -> Text -> Text -> Text -> Maybe Int -> AppM LCIAResult - getActivityLCIA dbName processIdText _collectionName methodIdText topFlowsParam = - activityLCIACore dbName processIdText methodIdText topFlowsParam Nothing - - -- POST: LCIA with substitutions - postActivityLCIA :: Text -> Text -> Text -> Text -> SubstitutionRequest -> AppM LCIAResult - postActivityLCIA dbName processIdText _collectionName methodIdText subReq = - activityLCIACore dbName processIdText methodIdText Nothing (Just subReq) - - -- POST: sensitivity sweep (parallel rank-1 perturbations of A_ij) - -- - -- Perturbations are root-only by design (Sherman-Morrison rank-1 on - -- root's MUMPS factorization; 'Service.resolveRootOnly' rejects - -- "dbName::pid" forms). But scoring the perturbed root scaling must - -- still walk the cross-DB graph: dep-DB regional CFs are invisible - -- to a root-only 'applyBiosphereMatrix', and the propagation infra - -- ('SharedSolver.goWithDepsFromScalings') is already documented for - -- this exact use case ("caller supplies root scalings, e.g. after - -- a Sherman-Morrison update"). - -- - -- Cost: each perturbation now triggers one cross-DB back-substitution - -- per dep DB it actually reaches. MUMPS factorizations are cached, so - -- back-sub is O(n²) per dep DB, not full O(n³) factorization. - postActivitySensitivity :: Text -> Text -> Text -> Text -> SensitivityRequest -> AppM SensitivityResponse - postActivitySensitivity dbName processIdText _collectionName methodIdText senReq = do - (db, sharedSolver) <- requireDatabaseByName dbName - requireFullyLinked dbName db - method <- loadMethodByUUID methodIdText - (processId, activity) <- resolveOrThrow db processIdText - eRes <- liftIO $ Service.computeSensitivities db sharedSolver processId (srPerturbations senReq) - (baselineX, perResults) <- either throwServiceError pure eRes - unitCfg <- liftIO $ getMergedUnitConfig dbManager - let depLookup = DM.mkDepSolverLookup dbManager - scaleToSolution x = do - eSol <- - SharedSolver.goWithDepsFromScalings - unitCfg - depLookup - db - dbName - [] - [x] - 0 - pure $ case eSol of - Left err -> Left err - Right (sol : _) -> Right sol - Right [] -> Left "cross-DB propagation returned empty result" - eBaselineSol <- liftIO $ scaleToSolution baselineX - baselineSol <- - either - (\err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err}) - pure - eBaselineSol - baselineLcia <- - liftIO $ - computeCategoryResult dbManager dbName db baselineSol activity 5 Nothing method - perturbed <- - liftIO $ - mapConcurrently - (buildEntry db activity method baselineLcia scaleToSolution) - perResults - pure SensitivityResponse{srBaseline = baselineLcia, srPerturbed = perturbed} - where - buildEntry db activity method baselineLcia scaleToSolution (p, eitherX) = case eitherX of - Left err -> pure (PerturbedEntry p (Left err)) - Right x' -> do - eSol <- scaleToSolution x' - case eSol of - Left err -> pure (PerturbedEntry p (Left err)) - Right sol -> do - lcia <- computeCategoryResult dbManager dbName db sol activity 5 Nothing method - pure (PerturbedEntry p (Right (lcia, lrScore lcia - lrScore baselineLcia))) - - -- Batch LCIA endpoint (all methods in a collection). Thin alias over the - -- top-level activityLCIABatchH; preserves the Servant call sites. - activityLCIABatchCore :: Text -> Text -> Text -> Maybe SubstitutionRequest -> AppM LCIABatchResult - activityLCIABatchCore = activityLCIABatchH - - getActivityLCIABatch :: Text -> Text -> Text -> AppM LCIABatchResult - getActivityLCIABatch dbName processIdText collectionName = - activityLCIABatchCore dbName processIdText collectionName Nothing - - -- POST: Batch LCIA with substitutions - postActivityLCIABatch :: Text -> Text -> Text -> SubstitutionRequest -> AppM LCIABatchResult - postActivityLCIABatch dbName processIdText collectionName subReq = - activityLCIABatchCore dbName processIdText collectionName (Just subReq) - - -- POST: Inventory with substitutions - postActivityInventory :: Text -> Text -> SubstitutionRequest -> AppM InventoryExport - postActivityInventory dbName processIdText subReq = activityInventoryCore dbName processIdText (Just subReq) - - -- POST: Supply chain with substitutions - postActivitySupplyChain :: Text -> Text -> Maybe Text -> Maybe Int -> Maybe Double -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Text -> Maybe Text -> Maybe Bool -> SubstitutionRequest -> AppM SupplyChainResponse - postActivitySupplyChain dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam subReq = - activitySupplyChainCore dbName processIdText nameFilter limitParam minQuantity offsetParam maxDepthParam locationFilter productFilter presetParam classSystems classValues classModes sortParam orderParam includeEdgesParam (Just subReq) - - -- Activity consumers endpoint (reverse supply chain) - getActivityConsumers :: Text -> Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> Maybe Bool -> AppM ConsumersResponse - getActivityConsumers dbName processIdText nameFilter locationFilter productFilter presetParam classSystems classValues classModes limitParam offsetParam maxDepthParam sortParam orderParam includeEdgesParam = do - (db, _) <- requireDatabaseByName dbName - let presetFilters = expandPreset classificationPresets presetParam - explicitFilters = - zipWith3 - (\s v m -> (s, v, m == "exact")) - classSystems - classValues - (classModes ++ repeat "contains") - classFilters = presetFilters ++ explicitFilters - cnf = - Service.ConsumerFilter - { Service.cnfCore = - Service.ActivityFilterCore - { Service.afcName = nameFilter - , Service.afcLocation = locationFilter - , Service.afcProduct = productFilter - , Service.afcClassifications = classFilters - , Service.afcLimit = limitParam - , Service.afcOffset = offsetParam - , Service.afcSort = sortParam - , Service.afcOrder = orderParam - } - , Service.cnfMaxDepth = maxDepthParam - , Service.cnfIncludeEdges = fromMaybe False includeEdgesParam - } - either throwServiceError pure (Service.getConsumers db dbName processIdText cnf) - - -- Activity path-to endpoint (shortest supply chain path to first matching upstream activity) - getActivityPathTo :: Text -> Text -> Maybe Text -> AppM Value - getActivityPathTo dbName processIdText targetParam = do - (db, solver) <- requireDatabaseByName dbName - target <- - maybe - (throwError err400{errBody = "Missing required 'target' query parameter"}) - pure - targetParam - result <- liftIO $ Service.getPathTo db solver processIdText target - case result of - Left (Service.ActivityNotFound msg) -> - throwError err404{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left (Service.InvalidProcessId msg) -> - throwError err400{errBody = BSL.fromStrict $ T.encodeUtf8 msg} - Left err -> - throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} - Right val -> return val - - -- (resolveOrThrow, throwServiceError, logBatchCategory and loadCollection - -- now live at the top level — see the block above the `lcaServer` def. - -- Call sites in this `where` pass `dbManager` explicitly when needed.) - - -- Activity analysis endpoint (dispatches to registered analyzers) - getActivityAnalyze :: Text -> Text -> Text -> AppM Value - getActivityAnalyze dbName processIdText analyzerName = do - (db, sharedSolver) <- requireDatabaseByName dbName - case M.lookup analyzerName (prAnalyzers (dmPlugins dbManager)) of - Nothing -> throwError err404{errBody = "Analyzer not found: " <> BSL.fromStrict (T.encodeUtf8 analyzerName)} - Just analyzer -> do - case Service.resolveActivityAndProcessId db processIdText of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} - Right (actProcessId, _) -> do - inventory <- inventoryWithDeps dbName db sharedSolver actProcessId - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - loadedMethods <- liftIO $ DM.getLoadedMethods dbManager - let methods = map snd loadedMethods - ctx = - AnalyzeContext - { acDatabase = db - , acInventory = inventory - , acMethods = methods - , acParameters = M.empty - , acTechFlowDB = M.empty - , acBioFlowDB = mFlows - , acUnitDB = mUnits - } - liftIO $ ahAnalyze analyzer ctx - - -- Contributing flows: top elementary flows by LCIA contribution for a specific method - getContributingFlows :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingFlowsResult - getContributingFlows dbName processIdText _collectionName methodIdText limitParam = - withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do - let lim = fromMaybe 20 limitParam - unitCfg <- liftIO $ getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - inventory <- inventoryWithDeps dbName db sharedSolver actProcessId - tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method - let score = loScore (computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables) - (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables - contribs = sortOn (\(_, _, c) -> negate (abs c)) rawContribs - topFlows = - [ FlowContributionEntry - { fcoFlowName = bfName f - , fcoContribution = c - , fcoSharePct = if score /= 0 then c / score * 100 else 0 - , fcoFlowId = UUID.toText (bfId f) - , fcoCategory = bfCompartmentName f - , fcoCompartment = bfCompartmentSub f - , fcoCfValue = cfVal - } - | (f, cfVal, c) <- take lim contribs - ] - liftIO $ - unless (null unknownUuids) $ - reportProgress Warning $ - "[contributing-flows " - <> T.unpack (methodName method) - <> "] " - <> show (length unknownUuids) - <> " inventory flow UUID(s) absent from merged FlowDB. Samples: " - <> show (take 3 unknownUuids) - return - ContributingFlowsResult - { cfrMethod = methodName method - , cfrUnit = methodUnit method - , cfrTotalScore = score - , cfrTopFlows = topFlows - } - - -- Contributing activities: top upstream activities by LCIA contribution for a specific method - getContributingActivities :: Text -> Text -> Text -> Text -> Maybe Int -> AppM ContributingActivitiesResult - getContributingActivities dbName processIdText _collectionName methodIdText limitParam = - withActivityAndMethod dbName processIdText methodIdText $ \db sharedSolver actProcessId _ method -> do - let lim = fromMaybe 10 limitParam - requireFullyLinked dbName db - unitCfg <- liftIO $ getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method - -- Skip separate inventory compute: contributions sum equals the - -- score (same B·scaling·CF sum, just grouped per activity). - eContribs <- - liftIO $ - SharedSolver.crossDBProcessContributions - unitCfg - mUnits - mFlows - (DM.mkDepSolverLookup dbManager) - db - dbName - sharedSolver - actProcessId - tables - case eContribs of - Left err -> throwError err422{errBody = BSL.fromStrict $ T.encodeUtf8 err} - Right contributions -> do - let score = sum (M.elems contributions) - sorted = sortOn (\(_, c) -> negate (abs c)) (M.toList contributions) - top = take lim sorted - rows <- liftIO $ mapM (mkCrossDBContrib dbManager dbName mFlows mUnits score) top - return - ContributingActivitiesResult - { carMethod = methodName method - , carUnit = methodUnit method - , carTotalScore = score - , carActivities = rows - } - - -- Score every method in a set against one inventory in a single batched - -- pass. For fully non-regionalized sets (PEF) this is a stacked-broadcast - -- matvec — one walk over the inventory, m FMAs per non-zero entry — - -- instead of m separate inventory walks. Mixed/regio sets fall through - -- to the per-DB cross-DB regional sum inside the set-scoring function. - -- - -- The 'CrossDBSolution' carries the merged inventory and per-DB scaling - -- vectors collected during the inventory solve. Regional methods score - -- as a sum across all participating DBs (root + each dep DB reached at - -- request time); non-regional methods read the merged inventory only. - -- Flow detail endpoint - getFlowDetail :: Text -> Text -> AppM FlowDetail - getFlowDetail dbName flowIdText = do - (db, _) <- requireDatabaseByName dbName - withValidatedFlow db flowIdText $ \flow -> do - let fid = flowKindId flow - unitName' = flowKindUnitName (dbUnits db) flow - usageCount = Service.getFlowUsageCount db fid - return $ FlowDetail (apiFlowOfKind flow) unitName' usageCount - - -- Activities using a specific flow - getFlowActivities :: Text -> Text -> AppM [ActivitySummary] - getFlowActivities dbName flowIdText = do - (db, _) <- requireDatabaseByName dbName - withValidatedFlow db flowIdText $ \flow -> - return $ Service.getActivitiesUsingFlow db (flowKindId flow) - - -- List all available methods (from loaded collections) - getMethods :: AppM [MethodSummary] - getMethods = do - loadedMethods <- liftIO $ DM.getLoadedMethods dbManager - return - [ MethodSummary - { msmId = methodId m - , msmName = methodName m - , msmCategory = methodCategory m - , msmUnit = methodUnit m - , msmFactorCount = length (methodFactors m) - , msmCollection = collName - } - | (collName, m) <- loadedMethods - ] - - -- Get method details - getMethodDetail :: Text -> AppM MethodDetail - getMethodDetail methodIdText = do - method <- loadMethodByUUID methodIdText - return $ - MethodDetail - { mdId = methodId method - , mdName = methodName method - , mdDescription = methodDescription method - , mdUnit = methodUnit method - , mdCategory = methodCategory method - , mdMethodology = methodMethodology method - , mdFactorCount = length (methodFactors method) - } - - -- Get method characterization factors - getMethodFactors :: Text -> AppM [MethodFactorAPI] - getMethodFactors methodIdText = do - method <- loadMethodByUUID methodIdText - return $ map cfToAPI (methodFactors method) - - -- Get method flow mapping status - getMethodMapping :: Text -> Text -> AppM MappingStatus - getMethodMapping dbName methodIdText = do - (db, _) <- requireDatabaseByName dbName - method <- loadMethodByUUID methodIdText - mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method - let stats = computeMappingStats mappings - totalFactors = length mappings - coverage = - if totalFactors > 0 - then fromIntegral (totalFactors - msUnmatched stats) / fromIntegral totalFactors * 100 - else 0.0 - -- Get unmapped flows (limit to first 50 for API response) - unmappedFlows = - take - 50 - [ UnmappedFlowAPI - { ufaFlowRef = mcfFlowRef cf - , ufaFlowName = mcfFlowName cf - , ufaDirection = case mcfDirection cf of - MT.Input -> "Input" - MT.Output -> "Output" - } - | (cf, Nothing) <- mappings - ] - uniqueDbFlows = S.size $ S.fromList [bfId f | (_, Just (f, _)) <- mappings] - return - MappingStatus - { mstMethodId = methodId method - , mstMethodName = methodName method - , mstTotalFactors = msTotal stats - , mstMappedByUUID = msByUUID stats - , mstMappedByCAS = msByCAS stats - , mstMappedByName = msByName stats - , mstMappedBySynonym = msBySynonym stats - , mstUnmapped = msUnmatched stats - , mstCoverage = coverage - , mstDbBiosphereCount = fromIntegral (dbBiosphereCount db) - , mstUniqueDbFlowsMatched = uniqueDbFlows - , mstUnmappedFlows = unmappedFlows - } - - -- DB-flow-centric mapping: all biosphere flows with their CF assignments - getFlowCFMapping :: Text -> Text -> AppM FlowCFMapping - getFlowCFMapping dbName methodIdText = do - (db, _) <- requireDatabaseByName dbName - method <- loadMethodByUUID methodIdText - mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method - let - -- Build reverse index: DB flow UUID → (MethodCF, MatchStrategy) - reverseIndex = - M.fromList - [(bfId f, (cf, strat)) | (cf, Just (f, strat)) <- mappings] - -- Build entries for all biosphere flows - entries = map (buildFlowEntry db reverseIndex) (V.toList (dbBiosphereOrder db)) - matchedCount = length [() | e <- entries, isJust (fceCfValue e)] - return - FlowCFMapping - { fcmMethodName = methodName method - , fcmMethodUnit = methodUnit method - , fcmTotalFlows = fromIntegral (dbBiosphereCount db) - , fcmMatchedFlows = matchedCount - , fcmFlows = entries - } - - buildFlowEntry :: Database -> M.Map UUID (MethodCF, MatchStrategy) -> UUID -> FlowCFEntry - buildFlowEntry db reverseIndex uuid = - let mFlow = M.lookup uuid (dbBioFlows db) - mMatch = M.lookup uuid reverseIndex - in FlowCFEntry - { fceFlowId = uuid - , fceFlowName = maybe "" bfName mFlow - , fceFlowCategory = maybe "" bfCompartmentName mFlow - , fceCfValue = fmap (mcfValue . fst) mMatch - , fceCfFlowName = fmap (mcfFlowName . fst) mMatch - , fceMatchStrategy = fmap (strategyToText . snd) mMatch - } - - strategyToText :: MatchStrategy -> Text - strategyToText ByUUID = "uuid" - strategyToText ByCAS = "cas" - strategyToText ByName = "name" - strategyToText BySynonym = "synonym" - strategyToText ByFuzzy = "fuzzy" - strategyToText NoMatch = "none" - - -- Characterization: matched CFs for a method, filterable by flow name - getCharacterization :: Text -> Text -> Maybe Text -> Maybe Int -> AppM CharacterizationResult - getCharacterization dbName methodIdText flowFilter limitParam = do - (db, _) <- requireDatabaseByName dbName - method <- loadMethodByUUID methodIdText - let lim = fromMaybe 50 limitParam - queryLower = fmap T.toLower flowFilter - mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method - let matched = - [ (cf, f, strat) - | (cf, Just (f, strat)) <- mappings - , matchesQuery queryLower (mcfFlowName cf) (bfName f) - ] - sorted = sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched - top = take lim sorted - mkEntry (cf, f, strat) = - CharacterizationEntry - { cheMethodFlowName = mcfFlowName cf - , cheCfValue = mcfValue cf - , cheCfUnit = mcfUnit cf - , cheDirection = case mcfDirection cf of - MT.Input -> "Input" - MT.Output -> "Output" - , cheDbFlowName = bfName f - , cheFlowId = UUID.toText (bfId f) - , cheFlowUnit = getUnitNameForBioFlow (dbUnits db) f - , cheCategory = bfCompartmentName f - , cheCompartment = bfCompartmentSub f - , cheMatchStrategy = strategyToText strat - } - return - CharacterizationResult - { chrMethod = methodName method - , chrUnit = methodUnit method - , chrMatches = length matched - , chrShown = length top - , chrFactors = map mkEntry top - } - - matchesQuery :: Maybe Text -> Text -> Text -> Bool - matchesQuery Nothing _ _ = True - matchesQuery (Just q) cfName dbName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbName) - - -- Helper to load a method by UUID from the loaded collections - loadMethodByUUID :: Text -> AppM Method - loadMethodByUUID uuidText = do - loadedMethods <- liftIO $ DM.getLoadedMethods dbManager - let allMethods = map snd loadedMethods - case UUID.fromText uuidText of - Nothing -> throwError err400{errBody = "Invalid method UUID format"} - Just uuid -> - case filter (\m -> methodId m == uuid) allMethods of - (m : _) -> return m - [] -> throwError err404{errBody = "Method not found"} - - -- Resolve (database, shared solver, ProcessId, Activity, Method) from URL path params - -- and dispatch to the continuation. Maps the three Service errors to standard HTTP codes. - withActivityAndMethod :: - Text -> - Text -> - Text -> - (Database -> SharedSolver -> ProcessId -> Activity -> Method -> AppM a) -> - AppM a - withActivityAndMethod dbName processIdText methodIdText k = do - (db, sharedSolver) <- requireDatabaseByName dbName - method <- loadMethodByUUID methodIdText - case Service.resolveActivityAndProcessId db processIdText of - Left (Service.ActivityNotFound _) -> throwError err404{errBody = "Activity not found"} - Left (Service.InvalidProcessId _) -> throwError err400{errBody = "Invalid ProcessId format"} - Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} - Right (actProcessId, activity) -> k db sharedSolver actProcessId activity method - - -- Method collection handlers - getMethodCollections :: AppM MethodCollectionListResponse - getMethodCollections = do - statuses <- liftIO $ DM.listMethodCollections dbManager - return $ - MethodCollectionListResponse - [ MethodCollectionStatusAPI - { mcaName = mcsName s - , mcaDisplayName = mcsDisplayName s - , mcaDescription = mcsDescription s - , mcaStatus = case mcsStatus s of - DM.Loaded -> "loaded" - _ -> "unloaded" - , mcaIsUploaded = mcsIsUploaded s - , mcaPath = mcsPath s - , mcaMethodCount = mcsMethodCount s - , mcaFormat = Just (mcsFormat s) - } - | s <- statuses - ] - - loadMethodCollectionHandler :: Text -> AppM ActivateResponse - loadMethodCollectionHandler name = - simpleAction (DM.loadMethodCollection dbManager name) ("Loaded method: " <> name) - - unloadMethodCollectionHandler :: Text -> AppM ActivateResponse - unloadMethodCollectionHandler name = - simpleAction (DM.unloadMethodCollection dbManager name) ("Unloaded method: " <> name) - - -- Helper to convert MethodCF to API type - cfToAPI :: MethodCF -> MethodFactorAPI - cfToAPI cf = - MethodFactorAPI - { mfaFlowRef = mcfFlowRef cf - , mfaFlowName = mcfFlowName cf - , mfaDirection = case mcfDirection cf of - MT.Input -> "Input" - MT.Output -> "Output" - , mfaValue = mcfValue cf - } - - -- Search flows by name or synonym with optional language filtering and pagination - searchFlows :: Text -> Maybe Text -> Maybe Text -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults FlowSearchResult) - searchFlows dbName queryParam langParam limitParam offsetParam sortParam orderParam = do - (db, _) <- requireDatabaseByName dbName - case queryParam of - Nothing -> return (SearchResults [] 0 0 50 False 0.0) - Just query -> do - let ff = - Service.FlowFilter - { Service.ffQuery = query - , Service.ffLang = langParam - , Service.ffLimit = limitParam - , Service.ffOffset = offsetParam - , Service.ffSort = sortParam - , Service.ffOrder = orderParam - } - searchFlowsInternal db ff - - -- Search activities by specific fields with pagination and count - searchActivitiesWithCount :: Text -> Maybe Text -> Maybe Text -> Maybe Text -> Maybe Bool -> Maybe Text -> [Text] -> [Text] -> [Text] -> Maybe Int -> Maybe Int -> Maybe Text -> Maybe Text -> AppM (SearchResults ActivitySummary) - searchActivitiesWithCount dbName nameParam geoParam productParam exactParam presetParam classSystems classValues classModes limitParam offsetParam sortParam orderParam = do - (db, _) <- requireDatabaseByName dbName - -- Expand preset filters then merge with explicit classification params - let exactMatch = fromMaybe False exactParam - presetFilters = expandPreset classificationPresets presetParam - explicitFilters = - zipWith3 - (\s v m -> (s, v, m == "exact")) - classSystems - classValues - (classModes ++ repeat "contains") - classFilters = presetFilters ++ explicitFilters - let sf = - Service.SearchFilter - { Service.sfCore = - Service.ActivityFilterCore - { Service.afcName = nameParam - , Service.afcLocation = geoParam - , Service.afcProduct = productParam - , Service.afcClassifications = classFilters - , Service.afcLimit = limitParam - , Service.afcOffset = offsetParam - , Service.afcSort = sortParam - , Service.afcOrder = orderParam - } - , Service.sfExactMatch = exactMatch - } - result <- liftIO $ Service.searchActivities db sf - case result of - Left err -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack $ show err} - Right jsonValue -> case fromJSON jsonValue of - Success searchResults -> return searchResults - Error parseErr -> throwError err500{errBody = BSL.fromStrict $ T.encodeUtf8 $ T.pack parseErr} - - getClassifications :: Text -> AppM [ClassificationSystem] - getClassifications dbName = do - (db, _) <- requireDatabaseByName dbName - return $ Service.getClassifications db - - -- Batch impacts: thin alias over the top-level batchImpactsH. - postImpactsBatch :: Text -> Text -> Maybe Int -> BatchImpactsRequest -> AppM BatchImpactsResponse - postImpactsBatch = batchImpactsH {- | Evaluate every scoring set against the raw impact score map. Returns (setName → scoreName → value, setName → varName → ScoringIndicator). diff --git a/src/App/Env.hs b/src/App/Env.hs index 9cd1d3b6..e02d6ba6 100644 --- a/src/App/Env.hs +++ b/src/App/Env.hs @@ -1,42 +1,13 @@ {-# LANGUAGE DerivingStrategies #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE RankNTypes #-} -{- | The 'AppM' capability monad and the read-only environment that every -HTTP handler closes over. - -The categorical structure: 'AppM' is a 'Reader' monad transformer over -Servant's 'Handler', so it lives in the Kleisli category of @Reader -AppEnv@ lifted into @Handler@'s Kleisli category. 'hoistServer' (used -in 'API.Routes.lcaServer') is the natural transformation -@forall a. AppM a -> Handler a@ that turns a @ServerT api AppM@ into a -@ServerT api Handler@ — Servant doesn't care about the monad, as long -as we can collapse it down to 'Handler' at the boundary. - -The 'Has*' typeclasses are narrow capability witnesses: a function with -@(MonadReader r m, HasDatabaseManager r, MonadIO m) => m a@ declares -exactly the slice of the environment it needs, without committing to -the concrete monad. This is the "Has-pattern" of mtl-style: capability -classes are *projections* out of the environment object. --} +-- | 'AppM' is @'ReaderT' 'AppEnv' 'Handler'@; 'runApp' is the @AppM ~> Handler@ +-- mapping passed to Servant's 'hoistServer'. module App.Env ( - -- * Environment AppEnv (..), mkAppEnv, - - -- * Monad AppM (..), runApp, - - -- * Capability classes (Has-pattern) - HasDatabaseManager (..), - HasMaxTreeDepth (..), - HasPassword (..), - HasHostingConfig (..), - HasClassificationPresets (..), ) where import Control.Monad.Except (MonadError) @@ -55,7 +26,6 @@ data AppEnv = AppEnv , aeClassificationPresets :: ![Config.ClassificationPreset] } --- | Smart constructor — keeps callers from positionally swapping fields. mkAppEnv :: DatabaseManager -> Int @@ -65,65 +35,8 @@ mkAppEnv -> AppEnv mkAppEnv = AppEnv -{- | Servant 'Handler' threaded with a read-only 'AppEnv'. Deriving the -@MonadReader@ / @MonadIO@ / @MonadError ServerError@ instances via -@GeneralizedNewtypeDeriving@ keeps the wrapper free at runtime: 'AppM' -is representationally a function @AppEnv -> IO (Either ServerError a)@ -under the hood, identical to 'Handler' modulo the @AppEnv@ argument. --} newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a} deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError) -{- | Discharge an 'AppM' computation against a concrete environment, -producing a plain Servant 'Handler'. Use in 'API.Routes.lcaServer' as -@hoistServer lcaAPI (runApp env) handlers@. - -This is the natural transformation @AppM ~> Handler@ that Servant's -@hoistServer@ requires; it lifts the entire @ServerT api AppM@ into -@ServerT api Handler@ point-free. --} runApp :: AppEnv -> AppM a -> Handler a runApp env (AppM m) = runReaderT m env - --- --------------------------------------------------------------------------- --- Has-pattern: narrow capability witnesses --- --------------------------------------------------------------------------- - -{- | Witness that the environment exposes a 'DatabaseManager'. Handlers -that need DB access should constrain on @HasDatabaseManager r@ rather -than the concrete 'AppEnv', so the same code can run in tests with a -narrower env. --} -class HasDatabaseManager r where - getDatabaseManager :: r -> DatabaseManager - -instance HasDatabaseManager AppEnv where - getDatabaseManager = aeDbManager - --- | Max tree depth limit (anti-DoS guard for /tree and /graph endpoints). -class HasMaxTreeDepth r where - getMaxTreeDepth :: r -> Int - -instance HasMaxTreeDepth AppEnv where - getMaxTreeDepth = aeMaxTreeDepth - --- | Optional admin password gating @POST /auth@. -class HasPassword r where - getPassword :: r -> Maybe String - -instance HasPassword AppEnv where - getPassword = aePassword - --- | Hosting configuration consumed by @GET /hosting@. -class HasHostingConfig r where - getHostingConfig :: r -> Maybe Config.HostingConfig - -instance HasHostingConfig AppEnv where - getHostingConfig = aeHostingConfig - --- | Classification presets used in @/aggregate@ and @/supply-chain@ filters. -class HasClassificationPresets r where - getClassificationPresets :: r -> [Config.ClassificationPreset] - -instance HasClassificationPresets AppEnv where - getClassificationPresets = aeClassificationPresets diff --git a/src/CLI/Parser.hs b/src/CLI/Parser.hs index b41d6495..8b6093d3 100644 --- a/src/CLI/Parser.hs +++ b/src/CLI/Parser.hs @@ -11,13 +11,8 @@ import qualified Options.Applicative as OA import Version (buildTarget, gitHash, gitTag, version) -- --------------------------------------------------------------------------- --- Option-builder helpers --- --- optparse-applicative is the textbook /Free Applicative/ over a primitive --- @Parser@: composition with @\<*\>@ keeps the static structure exposed --- (which is why @--help@ generation works), and the helpers below factor --- the four shapes that dominated this file before — Text option, Int --- option, switch, positional Text arg — into one line each. +-- Option-builder helpers: collapses the @long/short/metavar/help@ boilerplate +-- around the four shapes (Text/Int option, Text/String positional arg). -- --------------------------------------------------------------------------- -- | @strOption@ with @long/metavar/help@, optionally a short alias. diff --git a/src/Data/Validation.hs b/src/Data/Validation.hs index 43efd733..bec7eea7 100644 --- a/src/Data/Validation.hs +++ b/src/Data/Validation.hs @@ -1,12 +1,6 @@ -{- | An Applicative-only validation type that accumulates errors via the -'Semigroup' on @e@ — the canonical example, in Milewski's -/Category Theory for Programmers/, of an Applicative that is /not/ a -Monad. A lawful 'Monad' instance for 'Validation' would have to -short-circuit on the first 'Failure' (to make @>>=@ associative w.r.t. -the underlying 'Either' behaviour); that would erase the accumulation -that motivates the type, so we stop at 'Applicative'. - -Use: +{- | Applicative-only validation that accumulates errors via the 'Semigroup' +on @e@. No 'Monad' instance: the @ap = (\<*\>)@ law would force '<*>' to +short-circuit, destroying accumulation. @ validateAll @@ -14,8 +8,7 @@ Use: validateAll a b = (,) \<$\> validateA a \<*\> validateB b @ -When both @validateA@ and @validateB@ fail, the resulting 'Failure' -carries /both/ messages, not just the first. +When both arms fail, the resulting 'Failure' carries both messages. -} module Data.Validation ( Validation (..), diff --git a/src/Database/Loader.hs b/src/Database/Loader.hs index 819cf06e..a3f74d9e 100644 --- a/src/Database/Loader.hs +++ b/src/Database/Loader.hs @@ -45,7 +45,6 @@ module Database.Loader ( fixActivityLinksWithCrossDB, findAllCrossDBLinks, CrossDBLinkingStats (..), - mempty, crossDBLinksCount, unresolvedCount, crossDBBySource, @@ -232,14 +231,9 @@ data UnlinkedExchange = UnlinkedExchange } deriving (Eq, Ord, Show) -{- | Summary of unlinked exchanges grouped by consumer activity. - -Categorically the product of four monoids: -'M.unionWith (++)' on the activity map, and addition on each 'Int' -counter. We hand-write the instance because bare 'Int' has no canonical -'Monoid' (Sum vs Product is ambiguous); wrapping the fields as 'Sum Int' -would have leaked through every constructor and accessor. --} +-- | Summary of unlinked exchanges grouped by consumer activity. +-- 'Monoid' is hand-written: bare 'Int' has no canonical instance, and using +-- 'Sum Int' would force every reader to unwrap. data UnlinkedSummary = UnlinkedSummary { usActivities :: !(M.Map T.Text [UnlinkedExchange]) -- consumer name → list of unlinked exchanges , usTotalLinks :: !Int diff --git a/src/Service.hs b/src/Service.hs index 3f51c653..de004c89 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -341,10 +341,7 @@ getActivityInventoryWithSharedSolver validators sharedSolver db processIdText = let inventoryExport = convertToInventoryExport db (dbBioFlows db) (dbUnits db) processId activity inventory return $ Right inventoryExport --- | Simple stats tracking for tree processing --- | Tree-traversal counters (total nodes / loop nodes / leaf nodes). The --- 'Semigroup' / 'Monoid' instance is the product of three Sum-Int monoids, --- hand-written to keep the bare 'Int' constructor positions ergonomic. +-- | Tree-traversal counters (total nodes / loop nodes / leaf nodes). data TreeStats = TreeStats Int Int Int -- total, loops, leaves instance Semigroup TreeStats where diff --git a/src/Types.hs b/src/Types.hs index 8b3bcde6..a99ea514 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -1116,12 +1116,10 @@ data CrossDBLinkingStats = CrossDBLinkingStats } deriving (Generic, NFData, Store) -{- | Product of monoids, componentwise: lists concat, the count\/blocker -map unions (counts summed, first blocker wins as a tiebreaker), the set -unions, the @Int@ counter sums. Hand-written rather than @via Generically@ -because bare @Int@ has no canonical 'Monoid' (Sum vs Product is ambiguous) -and the @(Int, LinkBlocker)@ map value mixes a 'Monoid' with a non-'Monoid'. --} +-- | Field-wise '<>'. On unresolved-product collision counts are summed +-- and the first 'LinkBlocker' wins (tiebreaker). Hand-written: bare 'Int' +-- has no canonical 'Monoid', and the @(Int, LinkBlocker)@ map value is +-- not itself a 'Monoid'. instance Semigroup CrossDBLinkingStats where s1 <> s2 = CrossDBLinkingStats From 666613d024efb46392dd01103cbb30ac12bd446a Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Tue, 26 May 2026 23:32:09 +0200 Subject: [PATCH 22/43] refactor(mapping): MappingStats Monoid, foldMap stats, Alternative cascade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two patterns at the heart of the LCIA mapping kernel that the previous code spelled out the long way. 1. MappingStats becomes a Monoid (field-wise sum, all-zero identity). computeMappingStats was seven independent walks over the mapping list (one `length mappings` + five `length . filter ((== Just s) . ...)` + one `length . filter (isNothing . snd)`). It is now one `foldMap` pass with an exhaustive case on MatchStrategy. - 7×|mappings| traversals → 1×|mappings|. - The strategy switch is exhaustive: adding a new `MatchStrategy` constructor is now a compile error in `tally` until it gets a row. The old `(== Just strategy) . fmap snd . snd` form silently under-counted any new variant. - MappingStats now composes via `<>` and `mconcat`, useful for any future cross-method or cross-DB aggregation. 2. lookupCascadeCF was a hand-unrolled UUID → exact → fallback ladder nested four `case Maybe` levels deep. With Maybe's Alternative the cascade IS the algorithm: M.lookup fid (mtUuidCF tables) <|> (M.lookup fid flowDB >>= byName) where `byName` ends with M.lookup (name, baseMed, normSub) (mtExactCF tables) <|> M.lookup (name, baseMed) (mtFallbackCF tables) No behaviour change. Five fewer indentation levels in the body, and the priority order reads top-to-bottom on the page. Tests: 1052/1052 hspec green, openapi.json byte-identical (md5 matches). --- src/Method/Mapping.hs | 84 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/src/Method/Mapping.hs b/src/Method/Mapping.hs index 026e6f2f..e14c6662 100644 --- a/src/Method/Mapping.hs +++ b/src/Method/Mapping.hs @@ -67,6 +67,7 @@ module Method.Mapping ( computeMappingStats, ) where +import Control.Applicative ((<|>)) import Control.DeepSeq (NFData) import Control.Monad.ST (runST) import Data.Aeson (ToJSON) @@ -114,7 +115,9 @@ data MatchStrategy NoMatch deriving (Eq, Show) --- | Statistics about mapping results +-- | Per-strategy mapping counters. Forms a 'Monoid' (field-wise sum, all-zero +-- identity) so per-batch stats compose with '<>' and 'computeMappingStats' +-- is a single 'foldMap' pass over the mapping list. data MappingStats = MappingStats { msTotal :: !Int -- ^ Total CFs in method @@ -133,6 +136,20 @@ data MappingStats = MappingStats } deriving (Eq, Show) +instance Semigroup MappingStats where + a <> b = + MappingStats + (msTotal a + msTotal b) + (msByUUID a + msByUUID b) + (msByCAS a + msByCAS b) + (msByName a + msByName b) + (msBySynonym a + msBySynonym b) + (msByFuzzy a + msByFuzzy b) + (msUnmatched a + msUnmatched b) + +instance Monoid MappingStats where + mempty = MappingStats 0 0 0 0 0 0 0 + -- | Build a MapContext from a Database (convenience for callers) buildMapContext :: Database -> MapContext buildMapContext db = @@ -252,20 +269,20 @@ pickByCompartment (f : fs) (Just comp) = Just $ | med `T.isInfixOf` cat = True | otherwise = False --- | Compute statistics about mapping results +-- | Per-strategy counts of mapping results in one pass. +-- Each 'MatchStrategy' must be named below — adding a new variant is a +-- compile error here until it gets a row. computeMappingStats :: [(MethodCF, Maybe (BiosphereFlow, MatchStrategy))] -> MappingStats -computeMappingStats mappings = - MappingStats - { msTotal = length mappings - , msByUUID = count ByUUID - , msByCAS = count ByCAS - , msByName = count ByName - , msBySynonym = count BySynonym - , msByFuzzy = count ByFuzzy - , msUnmatched = length $ filter (isNothing . snd) mappings - } +computeMappingStats = foldMap (tally . fmap snd . snd) where - count strategy = length $ filter ((== Just strategy) . fmap snd . snd) mappings + one = mempty{msTotal = 1} + tally Nothing = one{msUnmatched = 1} + tally (Just ByUUID) = one{msByUUID = 1} + tally (Just ByCAS) = one{msByCAS = 1} + tally (Just ByName) = one{msByName = 1} + tally (Just BySynonym) = one{msBySynonym = 1} + tally (Just ByFuzzy) = one{msByFuzzy = 1} + tally (Just NoMatch) = one{msUnmatched = 1} {- | Precomputed CF lookup tables for one (database, method) pair. Building these from raw mappings is O(n log n) over thousands of CFs, so they @@ -1088,28 +1105,25 @@ against ILCD-style bare media without requiring an explicit (medium, sub) pair for every combination. -} lookupCascadeCF :: MethodTables -> BioFlowDB -> UUID -> Maybe (Double, Text) -lookupCascadeCF tables flowDB fid = case M.lookup fid (mtUuidCF tables) of - Just cfv -> Just cfv - Nothing -> case M.lookup fid flowDB of - Nothing -> Nothing - Just flow -> - let name = normalizeName (bfName flow) - rawCategory = T.toLower (VT.bfCompartmentName flow) - (rawMed, rawSubFromCat) = case T.breakOn "/" rawCategory of - (m, rest) - | T.null rest -> (m, T.empty) - | otherwise -> (m, T.drop 1 rest) - rawSub = - let s = T.toLower (fromMaybe T.empty (VT.bfCompartmentSub flow)) - in if T.null s then rawSubFromCat else s - Compartment normMedRaw normSub _ = - normalizeCompartment (mtCompartmentMap tables) (Compartment rawMed rawSub T.empty) - baseMed = normalizeMedium normMedRaw - subcomp = normSub - exact = M.lookup (name, baseMed, subcomp) (mtExactCF tables) - in case exact of - Just _ -> exact - Nothing -> M.lookup (name, baseMed) (mtFallbackCF tables) +lookupCascadeCF tables flowDB fid = + M.lookup fid (mtUuidCF tables) + <|> (M.lookup fid flowDB >>= byName) + where + byName flow = + let name = normalizeName (bfName flow) + rawCategory = T.toLower (VT.bfCompartmentName flow) + (rawMed, rawSubFromCat) = case T.breakOn "/" rawCategory of + (m, rest) + | T.null rest -> (m, T.empty) + | otherwise -> (m, T.drop 1 rest) + rawSub = + let s = T.toLower (fromMaybe T.empty (VT.bfCompartmentSub flow)) + in if T.null s then rawSubFromCat else s + Compartment normMedRaw normSub _ = + normalizeCompartment (mtCompartmentMap tables) (Compartment rawMed rawSub T.empty) + baseMed = normalizeMedium normMedRaw + in M.lookup (name, baseMed, normSub) (mtExactCF tables) + <|> M.lookup (name, baseMed) (mtFallbackCF tables) -- | Normalize medium names between method CFs and database flows. normalizeMedium :: Text -> Text From 047f5fe844ec47ceb12f278714c73889ee27a0cf Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 11:30:01 +0200 Subject: [PATCH 23/43] refactor(database): extract Database.MatrixBuild from buildDatabaseWithMatrices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function was a ~220-line do mixing UUID interning, two matrix builders, the product index, and the final Database assembly. Every pure phase moves into a new Database.MatrixBuild; the IO entrypoint becomes a thin log → call pure helpers → assemble record sequence (-186/+27 in Database.hs). Patterns line up with the ones 666613d named for the mapping kernel: - findProducer is Alternative on Maybe — the cascade IS the algorithm: exchangeProcessLinkId ex <|> (exchangeActivityLinkId ex >>= \a -> M.lookup (a, exchangeFlowId ex) lkp) Replaces a four-level nested case Maybe ladder. - buildTechTriples uses traverse over (Either Text), which short-circuits on the first unit-conversion error. Tuple-Monoid + fold accumulates (triples, warnings) without partitioning Lefts and Rights by hand. - perActivity flattens (activity, exchange) iteration once. Tech and bio each carried a near-identical "fetch activity / key / normFactor / fold exchanges" wrapper; they now share it. Drops the dead _activity arg from the bio builder along the way. - Misc: safeDenom names the divide-by-zero guard used by both matrices; the double convertUnit call collapses into one match on (needsConversion, convertUnit ...); S.toAscList replaces sort . S.toList . S.fromList. Behaviour identical. Tests: 1052/1052 hspec green; the matrix log lines (activity count, technosphere / biosphere non-zero entries) match the pre-refactor numbers on every fixture. --- src/Database.hs | 212 ++++---------------------------- src/Database/MatrixBuild.hs | 234 ++++++++++++++++++++++++++++++++++++ volca.cabal | 1 + 3 files changed, 261 insertions(+), 186 deletions(-) create mode 100644 src/Database/MatrixBuild.hs diff --git a/src/Database.hs b/src/Database.hs index ff0d55d1..cb8abb71 100644 --- a/src/Database.hs +++ b/src/Database.hs @@ -4,23 +4,20 @@ module Database where -import Data.Either (lefts, rights) -import Data.Int (Int32) import qualified Data.IntSet as IS -import Data.List (sort) import qualified Data.Map as M import qualified Data.Set as S import Data.Text (Text) import qualified Data.Text as T -import qualified Data.UUID as UUID import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU +import Database.MatrixBuild import Progress import qualified Search.BM25.Types as BM25T import qualified Search.Fuzzy as Fuzzy import qualified Search.Normalize as Normalize import Types -import UnitConversion (UnitConfig, convertUnit, normalizeUnit) +import UnitConversion (UnitConfig) {- | Build complete database with pre-computed sparse matrices @@ -45,204 +42,47 @@ Matrix Construction: buildDatabaseWithMatrices :: UnitConfig -> M.Map (UUID, UUID) Activity -> TechFlowDB -> BioFlowDB -> WasteFlowDB -> UnitDB -> IO (Either Text Database) buildDatabaseWithMatrices unitConfig activityMap techFlowDB bioFlowDB wasteFlowDB unitDB = do reportMatrixOperation "Building database with pre-computed sparse matrices" + let !tables = buildInterningTables activityMap + !supplierRefUnits = buildSupplierRefUnits unitDB (itActivities tables) + !indexes = buildIndexesWithProcessIds (itActivities tables) (itProcessIdTable tables) + activityCount = itActivityCount tables - -- Step 1: Build UUID interning tables from Map keys - let activityKeys = M.keys activityMap - sortedKeys = sort activityKeys -- Ensure deterministic ordering - - -- Build forward lookup: ProcessId (Int32) -> (UUID, UUID) - dbProcessIdTable = V.fromList sortedKeys - - -- Build reverse lookup: (UUID, UUID) -> ProcessId (Int32) - dbProcessIdLookup = M.fromList $ zip sortedKeys [0 ..] - - -- Build activity UUID index: UUID -> ProcessId (for O(1) lookups) - dbActivityUUIDIndex = M.fromList [(actUUID, pid) | (pid, (actUUID, _)) <- zip [0 ..] sortedKeys] - - -- Build activity products index: UUID -> [ProcessId] (for multi-product activities) - dbActivityProductsIndex = M.fromListWith (++) [(actUUID, [pid]) | (pid, (actUUID, _)) <- zip [0 ..] sortedKeys] - - -- Build activity-product lookup for correct multi-output handling - -- Maps (activityUUID, productFlowUUID) -> ProcessId - -- This ensures exchanges link to the correct product in multi-output activities - activityProductLookup = M.fromList [((actUUID, prodUUID), pid) | (pid, (actUUID, prodUUID)) <- zip [0 ..] sortedKeys] - - -- Convert Map to Vector indexed by ProcessId - dbActivities = V.fromList [activityMap M.! key | key <- sortedKeys] - - -- Build indexes (now using Vector) - indexes = buildIndexesWithProcessIds dbActivities dbProcessIdTable - - -- Build supplier reference unit lookup: ProcessId -> unit name of reference product - -- Used to convert exchange amounts to the supplier's unit for correct A-matrix coefficients - supplierRefUnits = - V.map - ( \act -> - let refExs = [ex | ex <- exchanges act, exchangeIsReference ex, not (exchangeIsInput ex)] - in case refExs of - (ex : _) -> getUnitNameForExchange unitDB ex - [] -> "" - ) - dbActivities - - -- Build activity index for matrix construction - reportMatrixOperation "Building activity indexes" - let activityCount = fromIntegral (V.length dbActivities) :: Int32 - - -- Note: ProcessId is already the matrix index (identity mapping removed for performance) reportMatrixOperation ("Activity index built: " ++ show activityCount ++ " activities") - - -- Build technosphere sparse triplets reportMatrixOperation "Building technosphere matrix triplets" - let buildTechTriple normalizationFactor j consumerActivity _consumerPid ex - -- Biosphere exchanges live in the B matrix, not A. - -- WasteExchanges share the A matrix with technosphere flows: the - -- underlying calculation is identical to a product link. Orphan - -- waste outputs (no activityLinkId) naturally drop out below when - -- producerIdx is Nothing — same as orphan tech inputs. - | isBiosphereExchange ex = Right ([], []) - | exchangeIsReference ex = Right ([], []) -- reference product is on the diagonal - | otherwise = - let producerResult = case exchangeProcessLinkId ex of - Just pid -> (Just pid, []) - Nothing -> case exchangeActivityLinkId ex of - Just actUUID -> - case M.lookup (actUUID, exchangeFlowId ex) activityProductLookup of - Just pid -> (Just pid, []) - Nothing -> - -- Only warn if exchange has non-zero amount (zero-amount are placeholders) - let warning = - [ "Missing activity-product pair referenced by exchange:\n" - ++ " Activity UUID: " - ++ T.unpack (UUID.toText actUUID) - ++ "\n" - ++ " Product UUID: " - ++ T.unpack (UUID.toText (exchangeFlowId ex)) - ++ "\n" - ++ " Consumer: " - ++ T.unpack (activityName consumerActivity) - ++ "\n" - ++ " Expected file: " - ++ T.unpack (UUID.toText actUUID) - ++ "_" - ++ T.unpack (UUID.toText (exchangeFlowId ex)) - ++ ".spold\n" - ++ " This exchange will be skipped." - | abs (exchangeAmount ex) > 1e-15 - ] - in (Nothing, warning) - Nothing -> (Nothing, []) - (producerPid, warnings) = producerResult - producerIdx = - producerPid >>= \pid -> - if pid >= 0 && fromIntegral pid < activityCount - then Just $ fromIntegral pid - else Nothing - in case producerIdx of - Just idx -> - let rawValue = exchangeAmount ex - exchangeUnit = getUnitNameForExchange unitDB ex - supplierUnit = supplierRefUnits V.! fromIntegral idx - needsConversion = - normalizeUnit exchangeUnit /= normalizeUnit supplierUnit - && not (T.null exchangeUnit) - && not (T.null supplierUnit) - in case (needsConversion, convertUnit unitConfig exchangeUnit supplierUnit rawValue) of - (True, Nothing) -> - Left $ - "Unknown unit conversion: \"" - <> exchangeUnit - <> "\" \8594 \"" - <> supplierUnit - <> "\" in " - <> activityName consumerActivity - <> " \8212 add these units to [[units]] CSV" - _ -> - let convertedValue = case (needsConversion, convertUnit unitConfig exchangeUnit supplierUnit rawValue) of - (True, Just v) -> v - _ -> rawValue - denom = - if normalizationFactor > 1e-15 - then normalizationFactor - else 1.0 - sign = if exchangeIsInput ex then 1 else -1 - value = sign * convertedValue / denom - in Right ([SparseTriple idx j value | convertedValue /= 0, idx /= j], warnings) - Nothing -> Right ([], warnings) - - buildActivityTriplets (j, consumerPid) = - let consumerActivity = dbActivities V.! fromIntegral consumerPid - consumerKey = dbProcessIdTable V.! fromIntegral consumerPid - normalizationFactor = activityNormFactor consumerActivity consumerKey - buildNormalizedTechTriple = buildTechTriple normalizationFactor j consumerActivity consumerPid - results = map buildNormalizedTechTriple (exchanges consumerActivity) - in -- Short-circuit on first Left (unit conversion error) - case lefts results of - (err : _) -> Left err - [] -> let rs = rights results in Right (concatMap fst rs, concatMap snd rs) - - -- Collect results, failing on first unit conversion error - let activityRange = [(fromIntegral j, j) | j <- [0 .. fromIntegral activityCount - 1 :: ProcessId]] - activityResults = map buildActivityTriplets activityRange - case lefts activityResults of - (err : _) -> return $ Left err - [] -> do - let allResults = rights activityResults - !techTriples = VU.fromList $ concatMap fst allResults - techWarnings = concatMap snd allResults - - -- Emit warnings in IO context + case buildTechTriples unitConfig unitDB tables supplierRefUnits of + Left err -> pure (Left err) + Right (techTriples, techWarnings) -> do mapM_ (reportProgress Warning) techWarnings - reportMatrixOperation ("Technosphere matrix: " ++ show (VU.length techTriples) ++ " non-zero entries") - -- Build biosphere sparse triplets reportMatrixOperation "Building biosphere matrix triplets" - let bioFlowUUIDs = - V.fromList $ - sort $ - S.toList $ - S.fromList - [exchangeFlowId ex | pid <- [0 .. fromIntegral activityCount - 1 :: Int], let act = dbActivities V.! pid, ex <- exchanges act, isBiosphereExchange ex] - bioFlowCount = fromIntegral $ V.length bioFlowUUIDs :: Int32 - bioFlowIndex = M.fromList $ zip (V.toList bioFlowUUIDs) [0 ..] - - !bioTriples = - let buildBioTriple normalizationFactor j _activity ex - | not (isBiosphereExchange ex) = [] - | otherwise = - case M.lookup (exchangeFlowId ex) bioFlowIndex of - Just i -> - let rawValue = exchangeAmount ex - denom = if normalizationFactor > 1e-15 then normalizationFactor else 1.0 - value = rawValue / denom - in [SparseTriple i j value | rawValue /= 0] - Nothing -> [] - - buildActivityBioTriplets (j, pid) = - let activity = dbActivities V.! fromIntegral pid - activityKey = dbProcessIdTable V.! fromIntegral pid - normalizationFactor = activityNormFactor activity activityKey - in concatMap (buildBioTriple normalizationFactor j activity) (exchanges activity) - in VU.fromList $ concatMap buildActivityBioTriplets activityRange + let !bioFlowUUIDs = collectBioFlowOrder (itActivities tables) + !bioTriples = buildBioTriples bioFlowUUIDs tables + bioFlowCount = fromIntegral (V.length bioFlowUUIDs) reportMatrixOperation ("Biosphere matrix: " ++ show (VU.length bioTriples) ++ " non-zero entries") reportMatrixOperation "Database with matrices built successfully" - reportMatrixOperation ("Final matrix stats: " ++ show (VU.length techTriples) ++ " tech entries, " ++ show (VU.length bioTriples) ++ " bio entries") + reportMatrixOperation + ( "Final matrix stats: " + ++ show (VU.length techTriples) + ++ " tech entries, " + ++ show (VU.length bioTriples) + ++ " bio entries" + ) reportMatrixOperation "Building product index" - let !productIndex = buildProductIndex dbActivities dbProcessIdTable techFlowDB + let !productIndex = buildProductIndex (itActivities tables) (itProcessIdTable tables) techFlowDB reportMatrixOperation ("Product index: " ++ show (M.size (piByUUID productIndex)) ++ " products indexed") - return $ + pure $ Right Database - { dbProcessIdTable = dbProcessIdTable - , dbProcessIdLookup = dbProcessIdLookup - , dbActivityUUIDIndex = dbActivityUUIDIndex - , dbActivityProductsIndex = dbActivityProductsIndex + { dbProcessIdTable = itProcessIdTable tables + , dbProcessIdLookup = itProcessIdLookup tables + , dbActivityUUIDIndex = itActivityUUIDIndex tables + , dbActivityProductsIndex = itActivityProductsIndex tables , dbProductIndex = productIndex - , dbActivities = dbActivities + , dbActivities = itActivities tables , dbTechFlows = techFlowDB , dbBioFlows = bioFlowDB , dbWasteFlows = wasteFlowDB diff --git a/src/Database/MatrixBuild.hs b/src/Database/MatrixBuild.hs new file mode 100644 index 00000000..d1f9e0ef --- /dev/null +++ b/src/Database/MatrixBuild.hs @@ -0,0 +1,234 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- | Pure builders for the sparse matrices and UUID interning tables that back +'Database'. The IO entrypoint in "Database" composes these helpers and adds +progress reporting; all numerical work lives here. + +Sign and normalization conventions are documented on +'buildDatabaseWithMatrices'. +-} +module Database.MatrixBuild ( + InterningTables (..), + buildInterningTables, + buildSupplierRefUnits, + collectBioFlowOrder, + buildTechTriples, + buildBioTriples, +) where + +import Control.Applicative ((<|>)) +import Data.Foldable (fold) +import Data.Int (Int32) +import Data.List (sort) +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.UUID as UUID +import qualified Data.Vector as V +import qualified Data.Vector.Unboxed as VU +import Types +import UnitConversion (UnitConfig, convertUnit, normalizeUnit) + +{- | Per-process lookup tables built once from the sorted activity-key list. + +All fields share a single 'zip' [0..] sortedKeys' traversal, so row order +and ProcessId ↔ (UUID, UUID) consistency hold by construction. +-} +data InterningTables = InterningTables + { itProcessIdTable :: !(V.Vector (UUID, UUID)) + , itProcessIdLookup :: !(M.Map (UUID, UUID) ProcessId) + , itActivityUUIDIndex :: !(M.Map UUID ProcessId) + , itActivityProductsIndex :: !(M.Map UUID [ProcessId]) + , itActivities :: !(V.Vector Activity) + , itActivityCount :: !Int32 + } + +buildInterningTables :: M.Map (UUID, UUID) Activity -> InterningTables +buildInterningTables activityMap = + InterningTables + { itProcessIdTable = V.fromList sortedKeys + , itProcessIdLookup = M.fromList [(k, pid) | (pid, k) <- indexedKeys] + , itActivityUUIDIndex = M.fromList [(actUUID, pid) | (pid, (actUUID, _)) <- indexedKeys] + , itActivityProductsIndex = M.fromListWith (++) [(actUUID, [pid]) | (pid, (actUUID, _)) <- indexedKeys] + , itActivities = V.fromList [activityMap M.! k | k <- sortedKeys] + , itActivityCount = fromIntegral (length sortedKeys) + } + where + sortedKeys = sort (M.keys activityMap) + indexedKeys = zip [0 ..] sortedKeys + +{- | Reference-product output unit for each activity (empty when the activity +has no produced reference exchange — same fallback as the previous inline +expression). +-} +buildSupplierRefUnits :: UnitDB -> V.Vector Activity -> V.Vector Text +buildSupplierRefUnits unitDB = V.map refUnit + where + refUnit act = + case [ex | ex <- exchanges act, exchangeIsReference ex, not (exchangeIsInput ex)] of + (ex : _) -> getUnitNameForExchange unitDB ex + [] -> "" + +-- | Ascending vector of every biosphere flow UUID present in the activity set. +collectBioFlowOrder :: V.Vector Activity -> V.Vector UUID +collectBioFlowOrder activities = + V.fromList . S.toAscList . S.fromList $ + [ exchangeFlowId ex + | act <- V.toList activities + , ex <- exchanges act + , isBiosphereExchange ex + ] + +{- | Divide-by-zero guard for normalization factors. Activities with no +reference output normalize by 1.0 instead of a near-zero denominator. +-} +safeDenom :: Double -> Double +safeDenom f = if f > 1e-15 then f else 1.0 + +{- | Producer cascade for technosphere exchanges: prefer the resolved process +link, else look up the (activityUUID, flowUUID) pair. This is the +@Alternative@ on @Maybe@ — the @Maybe@ analogue of @firstNonEmpty@ in +"Database.CrossLinking". +-} +findProducer :: M.Map (UUID, UUID) ProcessId -> Exchange -> Maybe ProcessId +findProducer lkp ex = + exchangeProcessLinkId ex + <|> (exchangeActivityLinkId ex >>= \actUUID -> M.lookup (actUUID, exchangeFlowId ex) lkp) + +{- | Warning text for an exchange whose declared producer cannot be located. +Zero-amount placeholder exchanges produce no warning. +-} +missingActivityWarning :: Activity -> Exchange -> UUID -> [String] +missingActivityWarning consumer ex actUUID + | abs (exchangeAmount ex) <= 1e-15 = [] + | otherwise = + [ "Missing activity-product pair referenced by exchange:\n" + ++ " Activity UUID: " + ++ T.unpack (UUID.toText actUUID) + ++ "\n" + ++ " Product UUID: " + ++ T.unpack (UUID.toText (exchangeFlowId ex)) + ++ "\n" + ++ " Consumer: " + ++ T.unpack (activityName consumer) + ++ "\n" + ++ " Expected file: " + ++ T.unpack (UUID.toText actUUID) + ++ "_" + ++ T.unpack (UUID.toText (exchangeFlowId ex)) + ++ ".spold\n" + ++ " This exchange will be skipped." + ] + +unitConversionError :: Activity -> Text -> Text -> Text +unitConversionError consumer fromU toU = + "Unknown unit conversion: \"" + <> fromU + <> "\" \8594 \"" + <> toU + <> "\" in " + <> activityName consumer + <> " \8212 add these units to [[units]] CSV" + +{- | Flatten the activity set into a stream of @(normFactor, j, activity, ex)@ +tuples. Encapsulates the shared "for each activity j: get the activity, +get its key, get its norm factor, fold its exchanges" boilerplate. +-} +perActivity :: InterningTables -> [(Double, ProcessId, Activity, Exchange)] +perActivity tables = + [ (normFactor, j, act, ex) + | j <- [0 .. itActivityCount tables - 1] + , let act = itActivities tables V.! fromIntegral j + , let key = itProcessIdTable tables V.! fromIntegral j + , let normFactor = activityNormFactor act key + , ex <- exchanges act + ] + +{- | Technosphere sparse triplets + skipped-link warnings. + +Short-circuits on the first unit-conversion error. Accumulates triplets +and warnings via the tuple Monoid, mirroring the @foldMap@ stats pattern +used in "Method.Mapping". +-} +buildTechTriples :: + UnitConfig -> + UnitDB -> + InterningTables -> + V.Vector Text -> + Either Text (VU.Vector SparseTriple, [String]) +buildTechTriples unitConfig unitDB tables supplierRefUnits = + fmap pack (traverse step (perActivity tables)) + where + lkp = itProcessIdLookup tables + actCount = itActivityCount tables + step (normFactor, j, act, ex) = + techTriple unitConfig unitDB lkp supplierRefUnits actCount normFactor j act ex + pack rs = let (ts, ws) = fold rs in (VU.fromList ts, ws) + +techTriple :: + UnitConfig -> + UnitDB -> + M.Map (UUID, UUID) ProcessId -> + V.Vector Text -> + Int32 -> + Double -> + ProcessId -> + Activity -> + Exchange -> + Either Text ([SparseTriple], [String]) +techTriple unitConfig unitDB lkp supplierRefUnits actCount normFactor j consumer ex + -- Biosphere flows live in B; reference products sit on the diagonal of (I-A). + -- WasteExchanges share A: same product-link calculation as a technosphere flow. + -- Orphan waste outputs (no activityLinkId) drop out below when findProducer is Nothing. + | isBiosphereExchange ex = Right mempty + | exchangeIsReference ex = Right mempty + | otherwise = + case validProducerIdx of + Nothing -> Right ([], producerWarnings) + Just idx -> emitTriple idx + where + producerPid = findProducer lkp ex + validProducerIdx = do + pid <- producerPid + if pid >= 0 && pid < actCount then Just pid else Nothing + -- Only warn when the activity-link lookup itself failed (Just actUUID + no pid). + -- A missing processLink with no activityLink is a true orphan, not a data gap. + producerWarnings = case (producerPid, exchangeActivityLinkId ex) of + (Nothing, Just actUUID) -> missingActivityWarning consumer ex actUUID + _ -> [] + + emitTriple idx = + let raw = exchangeAmount ex + exchUnit = getUnitNameForExchange unitDB ex + suppUnit = supplierRefUnits V.! fromIntegral idx + needsConversion = + normalizeUnit exchUnit /= normalizeUnit suppUnit + && not (T.null exchUnit) + && not (T.null suppUnit) + in case (needsConversion, convertUnit unitConfig exchUnit suppUnit raw) of + (True, Nothing) -> Left (unitConversionError consumer exchUnit suppUnit) + (True, Just v) -> Right (triplesFor idx v, []) + (False, _) -> Right (triplesFor idx raw, []) + + triplesFor idx v = + let sign = if exchangeIsInput ex then 1 else -1 + value = sign * v / safeDenom normFactor + in [SparseTriple idx j value | v /= 0, idx /= j] + +{- | Biosphere sparse triplets. No unit conversion or producer cascade — each +biosphere exchange maps directly to its row via 'collectBioFlowOrder'. +-} +buildBioTriples :: V.Vector UUID -> InterningTables -> VU.Vector SparseTriple +buildBioTriples bioOrder tables = + VU.fromList $ concatMap step (perActivity tables) + where + bioIndex = M.fromList $ zip (V.toList bioOrder) [0 ..] + step (normFactor, j, _act, ex) + | not (isBiosphereExchange ex) = [] + | otherwise = case M.lookup (exchangeFlowId ex) bioIndex of + Nothing -> [] + Just i -> + let raw = exchangeAmount ex + value = raw / safeDenom normFactor + in [SparseTriple i j value | raw /= 0] diff --git a/volca.cabal b/volca.cabal index a5269356..b57034a5 100644 --- a/volca.cabal +++ b/volca.cabal @@ -39,6 +39,7 @@ library , Database.Manager , Database.Loader , Database.CrossLinking + , Database.MatrixBuild , Database.Upload , Database.UploadedDatabase , SynonymDB From 4b751e3a53e86820221982c552d99854c7939cfc Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 16:47:36 +0200 Subject: [PATCH 24/43] refactor(service): split applySubstitutionsAt planning from effect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 4-case classifier in applySubstitutionsAt dispatched on a tuple of booleans (fromDb == thisDbName, toDb == thisDbName) and interleaved matrix-perturbation arithmetic with IO calls into perturbA. The cases were unnamed and the rank-1 update was reconstructed inline in each branch. Introduce three substitution-domain ADTs: * Endpoint = Here ProcessId UUIDs | Elsewhere DepRef — replaces the tuple-of-bools dispatch with named, exhaustive constructors. * DepRef — the (dbName, db, pid, uuids) bundle the cross-DB helpers consume; replaces today's anonymous 4-tuple. * RankOneUpdate — the planned (consumerPid, perturbation, extras) value that planUpdate produces and applyRankOne consumes. planUpdate is now a pure four-clause Either, one clause per case, with findTechCoefficient / findStaticCrossDBLink lifted into requireTech / requireStatic helpers that surface the existing 422-mapped MatrixError. applyRankOne shrinks to a one-line mapM over perturbA. resolveRef becomes resolveEndpoint, returning the named Endpoint. No behaviour change: substitution + cross-DB specs still pass (44 examples). Helpers remain in the where-block; foldM/ExceptT cleanup and lifting to top-level follow in the next commits. --- src/Service.hs | 242 +++++++++++++++++++++++++++---------------------- 1 file changed, 136 insertions(+), 106 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index de004c89..36719c13 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -2283,6 +2283,38 @@ computeScalingVectorWithSubstitutionsCrossDB depLookup db rootDbName solver pid (d : _) -> Just d [] -> Nothing +{- | A substitution endpoint resolved against the loaded databases. +'Here' means the activity lives in @thisDb@ (no cross-DB plumbing +needed); 'Elsewhere' carries the dep-DB descriptor required to look up +static cross-DB links and synthesise virtual ones. + +The ADT replaces a @(Bool, Bool)@ dispatch on @(fromDb == thisDbName, +toDb == thisDbName)@: each constructor names what the boolean meant. +-} +data Endpoint + = Here !ProcessId !(UUID, UUID) + | Elsewhere !DepRef + +-- | An endpoint that lives in a dependency database. +data DepRef = DepRef + { drDbName :: !Text + , drDb :: !Database + , drPid :: !ProcessId + , drUUIDs :: !(UUID, UUID) + } + +{- | A planned rank-1 perturbation of one consumer column plus any virtual +cross-DB links the substitution introduces or cancels. Computed purely +from resolved endpoints by 'planUpdate'; consumed effectfully by +'applyRankOne'. Separating the two keeps the four substitution cases +out of the IO layer and makes them straightforward to test in isolation. +-} +data RankOneUpdate = RankOneUpdate + { ruConsumerPid :: !ProcessId + , ruPerturb :: ![(Int, Double)] + , ruExtras :: ![CrossDBLink] + } + {- | Apply all substitutions whose consumer lives in @thisDbName@ to the given scaling vectors. Substitutions whose consumer lives elsewhere are skipped at this level — they'll match at the DB where their consumer @@ -2353,130 +2385,128 @@ applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs = let (_, cPidText) = parseSubRef rootDb (subConsumer sub) case resolveActivityAndProcessId thisDb cPidText of Left e -> pure (Left e) - Right (consumerPid, _) -> do + Right (cPid, _) -> do let (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub) (toDb, toPidText) = parseSubRef rootDb (subTo sub) - eFrom <- resolveRef fromDb fromPidText - eTo <- resolveRef toDb toPidText + eFrom <- resolveEndpoint fromDb fromPidText + eTo <- resolveEndpoint toDb toPidText case (eFrom, eTo) of (Left e, _) -> pure (Left e) (_, Left e) -> pure (Left e) - (Right fromRef, Right toRef) -> - classify mFact xs consumerPid sub fromRef toRef - - classify mFact xs consumerPid sub fromRef toRef = - let (fromDb, fromPid, fromUUIDs, _) = fromRef - (toDb, toPid, toUUIDs, toDbObj) = toRef - in case (fromDb == thisDbName, toDb == thisDbName) of - (True, True) -> - case findTechCoefficient thisDb consumerPid fromPid of - Nothing -> noTechLinkErr sub consumerPid - Just aNorm -> - applyRankOne - mFact - xs - consumerPid - [ (fromIntegral fromPid, aNorm) - , (fromIntegral toPid, -aNorm) - ] - [] - (True, False) -> - -- Case B: drop this-DB oldSup, route demand to other-DB newSup. - case findTechCoefficient thisDb consumerPid fromPid of - Nothing -> noTechLinkErr sub consumerPid - Just aNorm -> - let aRaw = aNorm * activityNormalizationFactor thisDb consumerPid - newLk = mkVirtualLink thisDb consumerPid toDbObj toDb toUUIDs toPid aRaw - in applyRankOne - mFact - xs - consumerPid - [(fromIntegral fromPid, aNorm)] - [newLk] - (False, True) -> - -- Case C: cancel existing cross-DB link, pull new this-DB supplier. - case findStaticCrossDBLink thisDb consumerPid fromDb fromUUIDs of - Nothing -> noStaticLinkErr sub consumerPid fromDb fromPid - Just staticLk -> - let aRaw = cdlCoefficient staticLk - aNorm = aRaw / activityNormalizationFactor thisDb consumerPid - cancel = staticLk{cdlCoefficient = -aRaw} - in applyRankOne - mFact - xs - consumerPid - [(fromIntegral toPid, -aNorm)] - [cancel] - (False, False) -> - -- Case D: re-route demand between two other DBs; this-DB x unchanged. - case findStaticCrossDBLink thisDb consumerPid fromDb fromUUIDs of - Nothing -> noStaticLinkErr sub consumerPid fromDb fromPid - Just staticLk -> - let aRaw = cdlCoefficient staticLk - cancel = staticLk{cdlCoefficient = -aRaw} - newLk = mkVirtualLink thisDb consumerPid toDbObj toDb toUUIDs toPid aRaw - in applyRankOne mFact xs consumerPid [] [cancel, newLk] - - applyRankOne mFact xs consumerPid perturb extra = do + (Right fromEp, Right toEp) -> + case planUpdate sub cPid fromEp toEp of + Left e -> pure (Left e) + Right upd -> applyRankOne mFact xs upd + + planUpdate :: + Substitution -> + ProcessId -> + Endpoint -> + Endpoint -> + Either ServiceError RankOneUpdate + -- Case A: both suppliers in this DB. Symmetric rank-1 on the consumer column. + planUpdate sub cPid (Here fromPid _) (Here toPid _) = do + a <- requireTech sub cPid fromPid + Right $ + RankOneUpdate + cPid + [(fromIntegral fromPid, a), (fromIntegral toPid, -a)] + [] + -- Case B: drop this-DB oldSup, route demand to other-DB newSup. + -- aRaw = aNorm * normFactor (the cross-DB link stores *raw* coefficients). + planUpdate sub cPid (Here fromPid _) (Elsewhere toRef) = do + a <- requireTech sub cPid fromPid + let aRaw = a * activityNormalizationFactor thisDb cPid + newLk = virtualLinkTo cPid toRef aRaw + Right $ RankOneUpdate cPid [(fromIntegral fromPid, a)] [newLk] + -- Case C: cancel existing cross-DB link, pull new this-DB supplier. + planUpdate sub cPid (Elsewhere fromRef) (Here toPid _) = do + s <- requireStatic sub cPid fromRef + let aRaw = cdlCoefficient s + aNorm = aRaw / activityNormalizationFactor thisDb cPid + cancel = s{cdlCoefficient = -aRaw} + Right $ RankOneUpdate cPid [(fromIntegral toPid, -aNorm)] [cancel] + -- Case D: re-route demand between two other DBs; this-DB x unchanged. + -- Unlike Case B, the new-link coefficient is the *raw* static value, + -- not aNorm*normFactor — we're forwarding what the cancelled link carried. + planUpdate sub cPid (Elsewhere fromRef) (Elsewhere toRef) = do + s <- requireStatic sub cPid fromRef + let aRaw = cdlCoefficient s + cancel = s{cdlCoefficient = -aRaw} + newLk = virtualLinkTo cPid toRef aRaw + Right $ RankOneUpdate cPid [] [cancel, newLk] + + virtualLinkTo cPid toRef = + mkVirtualLink thisDb cPid (drDb toRef) (drDbName toRef) (drUUIDs toRef) (drPid toRef) + + requireTech sub cPid fromPid = + maybe (Left $ noTechLink sub cPid) Right $ + findTechCoefficient thisDb cPid fromPid + + requireStatic sub cPid fromRef = + maybe (Left $ noStaticLink sub cPid (drDbName fromRef) (drPid fromRef)) Right $ + findStaticCrossDBLink thisDb cPid (drDbName fromRef) (drUUIDs fromRef) + + applyRankOne mFact xs upd = do -- Apply the same rank-1 update to each of the K vectors. z depends -- only on u (not x); a future optimization can compute z once. results <- mapM - (\x -> perturbA thisDb mFact x (fromIntegral consumerPid) perturb) + (\x -> perturbA thisDb mFact x (fromIntegral (ruConsumerPid upd)) (ruPerturb upd)) xs pure $ case sequence results of Left msg -> Left (MatrixError msg) - Right xs' -> Right (xs', extra) - - noTechLinkErr sub consumerPid = - pure $ - Left $ - MatrixError $ - "No technosphere link from " - <> processIdToText thisDb consumerPid - <> " to supplier " - <> subFrom sub - - noStaticLinkErr sub consumerPid fromDb fromPid = - pure $ - Left $ - MatrixError $ - "no cross-DB link from " - <> processIdToText thisDb consumerPid - <> " to " - <> fromDb - <> "::" - <> T.pack (show fromPid) - <> " (requested by substitution " - <> subFrom sub - <> " -> " - <> subTo sub - <> ")" - - resolveRef :: Text -> Text -> IO (Either ServiceError (Text, ProcessId, (UUID, UUID), Database)) - resolveRef refDb pidText - | refDb == thisDbName = case resolveActivityAndProcessId thisDb pidText of - Left e -> pure (Left e) - Right (p, _) -> - let uuids = dbProcessIdTable thisDb V.! fromIntegral p - in pure (Right (refDb, p, uuids, thisDb)) + Right xs' -> Right (xs', ruExtras upd) + + noTechLink sub cPid = + MatrixError $ + "No technosphere link from " + <> processIdToText thisDb cPid + <> " to supplier " + <> subFrom sub + + noStaticLink sub cPid fromDb fromPid = + MatrixError $ + "no cross-DB link from " + <> processIdToText thisDb cPid + <> " to " + <> fromDb + <> "::" + <> T.pack (show fromPid) + <> " (requested by substitution " + <> subFrom sub + <> " -> " + <> subTo sub + <> ")" + + resolveEndpoint :: Text -> Text -> IO (Either ServiceError Endpoint) + resolveEndpoint refDb pidText + | refDb == thisDbName = + pure $ case resolveActivityAndProcessId thisDb pidText of + Left e -> Left e + Right (p, _) -> + Right $ Here p (dbProcessIdTable thisDb V.! fromIntegral p) | otherwise = do mPair <- depLookup refDb - case mPair of + pure $ case mPair of Nothing -> - pure $ - Left $ - MatrixError $ - "substitution references unloaded database: " <> refDb + Left $ + MatrixError $ + "substitution references unloaded database: " <> refDb Just (depDb, _) -> case resolveActivityAndProcessId depDb pidText of Left _ -> - pure $ - Left $ - MatrixError $ - "substitution PID not found in " <> refDb <> ": " <> pidText + Left $ + MatrixError $ + "substitution PID not found in " <> refDb <> ": " <> pidText Right (p, _) -> - let uuids = dbProcessIdTable depDb V.! fromIntegral p - in pure (Right (refDb, p, uuids, depDb)) + Right $ + Elsewhere + DepRef + { drDbName = refDb + , drDb = depDb + , drPid = p + , drUUIDs = dbProcessIdTable depDb V.! fromIntegral p + } {- | Build a synthesized 'CrossDBLink' for a what-if substitution targeting a dep-DB supplier. Mirrors the fields a real (load-time) link would have so From 5aa7630616da15467433ddf6a75622fb6174f752 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 16:50:02 +0200 Subject: [PATCH 25/43] refactor(service): drive applySubstitutionsAt with foldM over ExceptT Replace the hand-written applyAll/applySub recursion (manual Left propagation, threaded accumulators) with foldM in ExceptT ServiceError IO. One step function now sequences the three Either/IO operations (resolve consumer, resolve from/to endpoints, plan the rank-1 update, apply it) using monadic bind, so the four-case (Left e, _) ladder collapses to natural short-circuit on the first failure. Resolution order (from before to) and the call-once placement of getFactorization are preserved verbatim; both were called out in the plan as behaviour-sensitive. --- src/Service.hs | 47 ++++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index 36719c13..ebd8ab6c 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -8,6 +8,8 @@ import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), Ac import CLI.Types (DebugMatricesOptions (..)) import Control.Concurrent.Async (mapConcurrently) import Control.Exception (SomeException, try) +import Control.Monad (foldM) +import Control.Monad.Trans.Except (ExceptT (..), runExceptT) import Data.Aeson (Value, object, toJSON, (.=)) import Data.Either (fromRight, lefts, rights) import Data.Int (Int32) @@ -2352,13 +2354,12 @@ applySubstitutionsAt :: -- | full sub list (filtered to consumer==this) [Substitution] -> IO (Either ServiceError ([U.Vector Double], [CrossDBLink])) -applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs = do - let localSubs = filter consumerLivesHere allSubs - case localSubs of +applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs = + case filter consumerLivesHere allSubs of [] -> pure $ Right (scalings, []) - _ -> do + localSubs -> do mFact <- getFactorization solver - applyAll mFact scalings [] localSubs + runExceptT $ foldM (step mFact) (scalings, []) localSubs where thisDbName = unThisDb thisDbObj @@ -2374,29 +2375,21 @@ applySubstitutionsAt depLookup thisDb thisDbObj rootDb solver scalings allSubs = let (cDb, _) = parseSubRef rootDb (subConsumer sub) in cDb == thisDbName - applyAll _ xs links [] = pure $ Right (xs, links) - applyAll mFact xs links (sub : rest) = do - res <- applySub mFact xs sub - case res of - Left e -> pure (Left e) - Right (xs', extraLks) -> applyAll mFact xs' (links ++ extraLks) rest - - applySub mFact xs sub = do + -- Resolve, plan, and apply one substitution; thread the K scalings + -- and the accumulated virtual links. 'from' is resolved before 'to' + -- so a failing 'from' wins when both refs are unresolvable. + step mFact (xs, links) sub = do let (_, cPidText) = parseSubRef rootDb (subConsumer sub) - case resolveActivityAndProcessId thisDb cPidText of - Left e -> pure (Left e) - Right (cPid, _) -> do - let (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub) - (toDb, toPidText) = parseSubRef rootDb (subTo sub) - eFrom <- resolveEndpoint fromDb fromPidText - eTo <- resolveEndpoint toDb toPidText - case (eFrom, eTo) of - (Left e, _) -> pure (Left e) - (_, Left e) -> pure (Left e) - (Right fromEp, Right toEp) -> - case planUpdate sub cPid fromEp toEp of - Left e -> pure (Left e) - Right upd -> applyRankOne mFact xs upd + (fromDb, fromPidText) = parseSubRef rootDb (subFrom sub) + (toDb, toPidText) = parseSubRef rootDb (subTo sub) + (cPid, _) <- hoistEither $ resolveActivityAndProcessId thisDb cPidText + fromEp <- ExceptT $ resolveEndpoint fromDb fromPidText + toEp <- ExceptT $ resolveEndpoint toDb toPidText + upd <- hoistEither $ planUpdate sub cPid fromEp toEp + (xs', extra) <- ExceptT $ applyRankOne mFact xs upd + pure (xs', links ++ extra) + + hoistEither = ExceptT . pure planUpdate :: Substitution -> From 92b885a0495669ed38f29b80fc45949d86e1af6f Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 23:06:42 +0200 Subject: [PATCH 26/43] refactor(service): collapse convertExchangeWithUnit via LambdaCase + Alternative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 91-line where-clause inside convertActivityForAPI re-matched the Exchange variant six times (three parallel flow lookups, one target resolver, one flow-name merger) and threaded a correlated (Maybe, Maybe, Maybe) triple that could drift apart in principle. Refactor: - TargetRef record replaces the correlated triple — either all three fields are present or none. Project to the wire-format Maybes at the boundary only. - resolveTarget / resolveFlow each do one \case over Exchange, eliminating the per-side empty-Nothing repetitions. - Three named resolvers (byActivityUUID / byProductFlow / byCrossDBLink) return Maybe TargetRef; the SimaPro fallback chain is now <|>. - Two duplicated 5-line projections (Activity -> triple, CrossDBLink -> triple) extracted as activityToTarget / crossDBLinkToTarget. - crossDBLinkMap lifted out of the where to module-level buildCrossDBLinkMap. Semantics preserved: technosphere broken-link (linkId set but unresolvable) returns Nothing rather than falling through to the product-flow path, matching the original guard ladder. Verified: cabal build clean, cabal test 1107 / 0 failures. --- src/Service.hs | 230 ++++++++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 106 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index 1be5fe68..2bd50ee0 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeApplications #-} @@ -6,6 +7,7 @@ module Service where import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), parseSubRef, unresolvedFlowName) import CLI.Types (DebugMatricesOptions (..)) +import Control.Applicative ((<|>)) import Control.Concurrent.Async (mapConcurrently) import Control.Exception (SomeException, try) import Control.Monad (foldM) @@ -1082,6 +1084,7 @@ convertActivityForAPI unitCfg db processId activity = Just (activityUUID, _) -> getAllProductsForActivity db activityUUID Nothing -> [] (refProdName, refProdAmount, refProdUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity + linkMap = buildCrossDBLinkMap db processId in ActivityForAPI { pfaProcessId = processIdToText db processId , pfaName = activityName activity @@ -1094,115 +1097,130 @@ convertActivityForAPI unitCfg db processId activity = , pfaReferenceProductAmount = if T.null refProdName then Nothing else Just refProdAmount , pfaReferenceProductUnit = if T.null refProdName then Nothing else Just refProdUnit , pfaAllProducts = allProducts - , pfaExchanges = map convertExchangeWithUnit (exchanges activity) + , pfaExchanges = map (toExchangeWithUnit unitCfg db linkMap) (exchanges activity) , pfaNativeType = activityNativeType activity } - where - -- Cross-DB link lookup keyed by 'cdlConsumerFlowId' (the consumer-side flow - -- UUID). UUIDs are unique across flow kinds, so a tech "X" link and a waste - -- "X" link on the same activity cannot collide here. - crossDBLinkMap :: M.Map UUID CrossDBLink - crossDBLinkMap = case processIdToUUIDs db processId of - Just (actUUID, _) -> - M.fromList - [ (cdlConsumerFlowId link, link) - | link <- dbCrossDBLinks db - , cdlConsumerActUUID link == actUUID - , cdlConsumerFlowId link /= UUID.nil - ] - Nothing -> M.empty - convertExchangeWithUnit exchange = - let - -- Look up the flow in the appropriate side (tech vs bio vs waste) based on exchange variant. - techFlowInfo = case exchange of - TechnosphereExchange{techFlowId = fid} -> M.lookup fid (dbTechFlows db) - BiosphereExchange{} -> Nothing - WasteExchange{} -> Nothing - bioFlowInfo = case exchange of - BiosphereExchange{bioFlowId = fid} -> M.lookup fid (dbBioFlows db) - TechnosphereExchange{} -> Nothing - WasteExchange{} -> Nothing - wasteFlowInfo = case exchange of - WasteExchange{waFlowId = fid} -> M.lookup fid (dbWasteFlows db) - TechnosphereExchange{} -> Nothing - BiosphereExchange{} -> Nothing - (targetActivityName, targetActivityLocation, targetProcessId) = case exchange of - TechnosphereExchange{techFlowId = fId, techRole = role, techActivityLinkId = linkId} - | (role == Input || role == ReferenceInput) && linkId /= UUID.nil -> - case findActivityByActivityUUID db linkId of - Just targetActivity -> - let maybeProcessId = findProcessIdByActivityUUID db linkId - processIdText = fmap (processIdToText db) maybeProcessId - in (Just (activityName targetActivity), Just (activityLocation targetActivity), processIdText) - Nothing -> (Nothing, Nothing, Nothing) - | role == Input || role == ReferenceInput -> - -- SimaPro path: linkId is nil, resolve by product flow UUID - case findProcessIdByProductFlowWithFallback unitCfg db fId of - Just pid - | Just act <- getActivity db pid -> - (Just (activityName act), Just (activityLocation act), Just (processIdToText db pid)) - _ -> - case M.lookup fId crossDBLinkMap of - Just link -> - let crossPid = - cdlSourceDatabase link - <> "::" - <> UUID.toText (cdlSupplierActUUID link) - <> "_" - <> UUID.toText (cdlSupplierProdUUID link) - in (Just (cdlFlowName link), Just (cdlLocation link), Just crossPid) - Nothing -> (Nothing, Nothing, Nothing) - | otherwise -> (Nothing, Nothing, Nothing) - BiosphereExchange{} -> (Nothing, Nothing, Nothing) - -- A waste exchange consumed by a treatment activity links the - -- generator to that treatment, same shape as a technosphere - -- Input. Orphan waste outputs that the exact-match cross-DB - -- linker resolved show up via 'crossDBLinkMap' keyed on the - -- waste flow name. The remaining orphans stay unresolved. - WasteExchange{waActivityLinkId = linkId, waIsInput = True} - | linkId /= UUID.nil -> - case findActivityByActivityUUID db linkId of - Just targetActivity -> - let maybeProcessId = findProcessIdByActivityUUID db linkId - processIdText = fmap (processIdToText db) maybeProcessId - in (Just (activityName targetActivity), Just (activityLocation targetActivity), processIdText) - Nothing -> (Nothing, Nothing, Nothing) - WasteExchange{waActivityLinkId = linkId, waIsInput = False, waFlowId = fid} - | linkId == UUID.nil -> - case M.lookup fid crossDBLinkMap of - Just link -> - let crossPid = - cdlSourceDatabase link - <> "::" - <> UUID.toText (cdlSupplierActUUID link) - <> "_" - <> UUID.toText (cdlSupplierProdUUID link) - in (Just (cdlFlowName link), Just (cdlLocation link), Just crossPid) - Nothing -> (Nothing, Nothing, Nothing) - WasteExchange{} -> (Nothing, Nothing, Nothing) - -- When the exchange's flow UUID resolves on none of the three sides - -- we surface the raw UUID rather than a generic "unknown" — that - -- way the consumer can tell which flow failed to resolve and the - -- gap is debuggable instead of silently misnamed. - flowNameTxt = case (techFlowInfo, bioFlowInfo, wasteFlowInfo) of - (Just tf, _, _) -> tfName tf - (_, Just bf, _) -> bfName bf - (_, _, Just wf) -> wfName wf - _ -> " UUID.toText (exchangeFlowId exchange) <> ">" - compartment = bfCompartment =<< bioFlowInfo - in - ExchangeWithUnit - { ewuExchange = exchange - , ewuUnitName = getUnitNameForExchange (dbUnits db) exchange - , ewuFlowName = flowNameTxt - , ewuCompartment = compartment - , ewuTargetActivity = targetActivityName - , ewuTargetLocation = targetActivityLocation - , ewuTargetProcessId = targetProcessId - , ewuExComment = exchangeComment exchange - , ewuPedigree = exchangePedigree exchange - } +{- | Resolved target activity for a technosphere or waste exchange. Either all +three fields are present (Just TargetRef) or none (Nothing) — the formerly +correlated triple of Maybes can no longer drift apart. +-} +data TargetRef = TargetRef + { trName :: !Text + , trLocation :: !Text + , trProcessId :: !Text + } + +activityToTarget :: Database -> ProcessId -> Activity -> TargetRef +activityToTarget db pid act = + TargetRef (activityName act) (activityLocation act) (processIdToText db pid) + +crossDBLinkToTarget :: CrossDBLink -> TargetRef +crossDBLinkToTarget link = + TargetRef + (cdlFlowName link) + (cdlLocation link) + ( cdlSourceDatabase link + <> "::" + <> UUID.toText (cdlSupplierActUUID link) + <> "_" + <> UUID.toText (cdlSupplierProdUUID link) + ) + +-- | EcoSpold path: resolve a target by activity UUID. +resolveByActivityUUID :: Database -> UUID -> Maybe TargetRef +resolveByActivityUUID db linkId + | linkId == UUID.nil = Nothing + | otherwise = do + pid <- findProcessIdByActivityUUID db linkId + act <- getActivity db pid + pure (activityToTarget db pid act) + +-- | SimaPro path: resolve a target by product flow UUID. +resolveByProductFlow :: UnitConfig -> Database -> UUID -> Maybe TargetRef +resolveByProductFlow cfg db fId = do + pid <- findProcessIdByProductFlowWithFallback cfg db fId + act <- getActivity db pid + pure (activityToTarget db pid act) + +-- | Cross-database link resolution (orphan waste outputs, missing tech links). +resolveByCrossDBLink :: M.Map UUID CrossDBLink -> UUID -> Maybe TargetRef +resolveByCrossDBLink links fId = crossDBLinkToTarget <$> M.lookup fId links + +{- | Resolve the target activity (if any) for one exchange. Technosphere broken +links (linkId set but unresolvable) do NOT fall through to the product-flow +path — that matches the original behaviour. Use '<|>' to chain fallbacks only +where the original code did. +-} +resolveTarget + :: UnitConfig + -> Database + -> M.Map UUID CrossDBLink + -> Exchange + -> Maybe TargetRef +resolveTarget cfg db links = \case + TechnosphereExchange{techRole = role, techActivityLinkId = lid, techFlowId = fid} + | role /= Input && role /= ReferenceInput -> Nothing + | lid /= UUID.nil -> resolveByActivityUUID db lid + | otherwise -> resolveByProductFlow cfg db fid <|> resolveByCrossDBLink links fid + BiosphereExchange{} -> Nothing + WasteExchange{waIsInput = True, waActivityLinkId = lid} + | lid /= UUID.nil -> resolveByActivityUUID db lid + | otherwise -> Nothing + WasteExchange{waIsInput = False, waActivityLinkId = lid, waFlowId = fid} + | lid == UUID.nil -> resolveByCrossDBLink links fid + | otherwise -> Nothing + +{- | Flow name + (biosphere-only) compartment. Each variant has exactly one +flow side by construction, so no Maybe-merge is needed downstream. +-} +resolveFlow :: Database -> Exchange -> Maybe (Text, Maybe Compartment) +resolveFlow db = \case + TechnosphereExchange{techFlowId = fid} -> + (\tf -> (tfName tf, Nothing)) <$> M.lookup fid (dbTechFlows db) + BiosphereExchange{bioFlowId = fid} -> + (\bf -> (bfName bf, bfCompartment bf)) <$> M.lookup fid (dbBioFlows db) + WasteExchange{waFlowId = fid} -> + (\wf -> (wfName wf, Nothing)) <$> M.lookup fid (dbWasteFlows db) + +{- | Build the cross-DB link map for one activity, keyed by consumer flow UUID. +UUIDs are unique across flow kinds, so a tech and a waste link on the same +activity cannot collide here. +-} +buildCrossDBLinkMap :: Database -> ProcessId -> M.Map UUID CrossDBLink +buildCrossDBLinkMap db pid = case processIdToUUIDs db pid of + Just (actUUID, _) -> + M.fromList + [ (cdlConsumerFlowId link, link) + | link <- dbCrossDBLinks db + , cdlConsumerActUUID link == actUUID + , cdlConsumerFlowId link /= UUID.nil + ] + Nothing -> M.empty + +toExchangeWithUnit + :: UnitConfig + -> Database + -> M.Map UUID CrossDBLink + -> Exchange + -> ExchangeWithUnit +toExchangeWithUnit cfg db links exchange = + -- Surface the raw UUID when the flow does not resolve — a clear failure + -- the consumer can debug, not a silent "unknown". + let unresolvedName = " UUID.toText (exchangeFlowId exchange) <> ">" + (flowName, compartment) = fromMaybe (unresolvedName, Nothing) (resolveFlow db exchange) + target = resolveTarget cfg db links exchange + in ExchangeWithUnit + { ewuExchange = exchange + , ewuUnitName = getUnitNameForExchange (dbUnits db) exchange + , ewuFlowName = flowName + , ewuCompartment = compartment + , ewuTargetActivity = trName <$> target + , ewuTargetLocation = trLocation <$> target + , ewuTargetProcessId = trProcessId <$> target + , ewuExComment = exchangeComment exchange + , ewuPedigree = exchangePedigree exchange + } {- | Get reference product name from activity exchanges. Reference products are always technosphere. From 2812e08a1204ff24c436ff430813f1bf2589b832 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 23:23:51 +0200 Subject: [PATCH 27/43] refactor(service): unify getActivityExchangeDetails via FlowKind dispatch The where-clause repeated the three-arm Exchange/flow-lookup ladder and re-built the cross-DB link map by hand, mirroring the same shape as convertActivityForAPI's old where-clause. Lift the variant dispatch into a single 'FlowKind'-returning lookup: - Types.hs: lookupExchangeFlow :: Database -> Exchange -> Maybe FlowKind plus flowKindCompartment :: FlowKind -> Maybe Compartment (alongside the existing flowKind* projections). One arm per variant, exhaustive. - Service.hs: getActivityExchangeDetails is now four lines (filter, map); toExchangeDetail composes lookupExchangeFlow + apiFlowOfKind + flowKindUnitName instead of three parallel case arms. Cross-DB link resolution reuses the shared buildCrossDBLinkMap. crossDBLinkToSummary + resolveTargetSummary mirror the TargetRef helpers from the previous commit, with '<|>' for the getTargetActivity / cross-DB fallback chain. - unresolvedUnit lifted to module-level constant (no longer recomputed per call). - resolveFlow (introduced last commit) also collapses via lookupExchangeFlow + flowKindCompartment, so the two refactors converge on the same primitive. Semantics preserved: biosphere exchanges still return Nothing target; the cross-DB fallback fires only when getTargetActivity fails. Verified: cabal build clean, cabal test 1107 / 0 failures. --- src/Service.hs | 185 ++++++++++++++++++++----------------------------- src/Types.hs | 19 +++++ 2 files changed, 96 insertions(+), 108 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index 2bd50ee0..6e106f22 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -5,7 +5,7 @@ module Service where -import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), parseSubRef, unresolvedFlowName) +import API.Types (ActivityForAPI (..), ActivityInfo (..), ActivityLinks (..), ActivityMetadata (..), ActivityStats (..), ActivitySummary (..), ApiFlow (..), ClassificationSystem (..), ConsumerResult (..), ConsumersResponse (..), CutoffWasteFlow (..), EdgeType (..), ExchangeDetail (..), ExchangeWithUnit (..), ExportNode (..), FlowDetail (..), FlowInfo (..), FlowRole (..), FlowSearchResult (..), FlowSummary (..), GraphEdge (..), GraphExport (..), GraphNode (..), InventoryExport (..), InventoryFlowDetail (..), InventoryMetadata (..), InventoryStatistics (..), NodeType (..), Perturbation (..), RootDb (..), SearchResults (..), Substitution (..), SupplyChainEdge (..), SupplyChainEntry (..), SupplyChainResponse (..), ThisDb (..), TreeEdge (..), TreeExport (..), TreeMetadata (..), apiFlowOfKind, parseSubRef, unresolvedFlowName) import CLI.Types (DebugMatricesOptions (..)) import Control.Applicative ((<|>)) import Control.Concurrent.Async (mapConcurrently) @@ -1175,13 +1175,9 @@ resolveTarget cfg db links = \case flow side by construction, so no Maybe-merge is needed downstream. -} resolveFlow :: Database -> Exchange -> Maybe (Text, Maybe Compartment) -resolveFlow db = \case - TechnosphereExchange{techFlowId = fid} -> - (\tf -> (tfName tf, Nothing)) <$> M.lookup fid (dbTechFlows db) - BiosphereExchange{bioFlowId = fid} -> - (\bf -> (bfName bf, bfCompartment bf)) <$> M.lookup fid (dbBioFlows db) - WasteExchange{waFlowId = fid} -> - (\wf -> (wfName wf, Nothing)) <$> M.lookup fid (dbWasteFlows db) +resolveFlow db exchange = do + fk <- lookupExchangeFlow db exchange + pure (flowKindName fk, flowKindCompartment fk) {- | Build the cross-DB link map for one activity, keyed by consumer flow UUID. UUIDs are unique across flow kinds, so a tech and a waste link on the same @@ -1323,109 +1319,82 @@ getActivitiesUsingFlow db flowUUID = , Just processId <- [findProcessIdForActivity db proc] ] -{- | Helper function to get detailed exchanges with filtering. Resolves -cross-DB technosphere inputs (SimaPro pattern: activityLinkId is nil, -the supplier lives in a dep DB via 'dbCrossDBLinks') by synthesizing an -'ActivitySummary' with a qualified pid @"dbName::actUUID_prodUUID"@ — -same convention the @/activity/{pid}@ endpoint uses. +{- | Sentinel returned only when an exchange's unit UUID failed to resolve. +The exchange unit-name field already surfaces the same gap via +'getUnitNameForExchange', so consumers see the missing unit in both the +structured 'Unit' and the unit-name string. +-} +unresolvedUnit :: Unit +unresolvedUnit = Unit{unitId = UUID.nil, unitName = "", unitSymbol = "", unitComment = ""} + +{- | 'ActivitySummary' form of a cross-DB link target. Mirrors +'crossDBLinkToTarget' but produces the richer wire shape consumed by the +exchange-details endpoint. +-} +crossDBLinkToSummary :: CrossDBLink -> ActivitySummary +crossDBLinkToSummary link = + ActivitySummary + { prsProcessId = + cdlSourceDatabase link + <> "::" + <> UUID.toText (cdlSupplierActUUID link) + <> "_" + <> UUID.toText (cdlSupplierProdUUID link) + , prsName = cdlFlowName link + , prsLocation = cdlLocation link + , prsProduct = cdlFlowName link + , prsProductAmount = 1.0 + , prsProductUnit = cdlExchangeUnit link + , prsAllocationPercent = Nothing + , prsAllocationFormula = Nothing + , prsNativeType = Nothing + } + +{- | Resolve an exchange's target as 'ActivitySummary', falling back to the +cross-DB link map for unresolved technosphere/waste links. Biosphere flows +have no target by definition. +-} +resolveTargetSummary + :: Database + -> M.Map UUID CrossDBLink + -> Exchange + -> Maybe ActivitySummary +resolveTargetSummary db links exchange = case exchange of + BiosphereExchange{} -> Nothing + TechnosphereExchange{} -> resolved + WasteExchange{} -> resolved + where + resolved = + getTargetActivity db exchange + <|> (crossDBLinkToSummary <$> M.lookup (exchangeFlowId exchange) links) + +{- | Detailed exchanges with filtering. Resolves cross-DB technosphere inputs +(SimaPro pattern: @activityLinkId@ is nil, the supplier lives in a dep DB +via 'dbCrossDBLinks') by synthesizing an 'ActivitySummary' with a qualified +pid @"dbName::actUUID_prodUUID"@ — same convention the @/activity/{pid}@ +endpoint uses. + +A missing flow row used to drop the exchange entirely. We now surface an +unresolved-flow entry instead, so the returned list always has one element +per matching exchange and the gap is reportable. -} getActivityExchangeDetails :: Database -> Activity -> (Exchange -> Bool) -> [ExchangeDetail] getActivityExchangeDetails db activity filterFn = - map mkDetail (filter filterFn (exchanges activity)) - where - -- A missing flow or unit row used to drop the exchange entirely. We now - -- surface an unresolved-flow entry instead, so the returned list always - -- has one element per matching exchange and the gap is reportable. - mkDetail exchange = - let unitForExchange = M.findWithDefault unresolvedUnit (exchangeUnitId exchange) (dbUnits db) - exUnitName = getUnitNameForExchange (dbUnits db) exchange - fid = exchangeFlowId exchange - unresolved flow = ExchangeDetail exchange flow "" unitForExchange exUnitName Nothing - in case exchange of - TechnosphereExchange{} -> case M.lookup fid (dbTechFlows db) of - Just flow -> - ExchangeDetail - exchange - (ApiTechFlow flow) - (getUnitNameForTechFlow (dbUnits db) flow) - unitForExchange - exUnitName - (resolveCrossDBTarget exchange) - Nothing -> unresolved (ApiUnresolvedFlow fid) - BiosphereExchange{} -> case M.lookup fid (dbBioFlows db) of - Just flow -> - ExchangeDetail - exchange - (ApiBioFlow flow) - (getUnitNameForBioFlow (dbUnits db) flow) - unitForExchange - exUnitName - Nothing -- Biosphere flows have no target activity - Nothing -> unresolved (ApiUnresolvedFlow fid) - WasteExchange{} -> case M.lookup fid (dbWasteFlows db) of - Just flow -> - ExchangeDetail - exchange - (ApiWasteFlow flow) - (getUnitNameForWasteFlow (dbUnits db) flow) - unitForExchange - exUnitName - (resolveCrossDBTarget exchange) - Nothing -> unresolved (ApiUnresolvedFlow fid) - - -- Sentinel returned only when the exchange's unit UUID itself failed - -- to resolve. The exchange unit name field already surfaces the same - -- gap via 'getUnitNameForExchange', so consumers see the missing unit - -- in both the structured Unit and the exUnitName string. - unresolvedUnit = Unit{unitId = UUID.nil, unitName = "", unitSymbol = "", unitComment = ""} - - -- Cross-DB link lookup keyed by the consumer-side flow UUID - -- ('cdlConsumerFlowId'). Built once per activity; O(log n) per-exchange - -- resolution. Keying by UUID — rather than normalized flow name — avoids - -- the collision where a tech "X" link and a waste "X" link on the same - -- activity would overwrite each other in a name-keyed map. - crossLinkByFlow :: M.Map UUID CrossDBLink - crossLinkByFlow = case findProcessIdForActivity db activity >>= processIdToUUIDs db of - Just (actUUID, _) -> - M.fromList - [ (cdlConsumerFlowId link, link) - | link <- dbCrossDBLinks db - , cdlConsumerActUUID link == actUUID - , cdlConsumerFlowId link /= UUID.nil - ] - Nothing -> M.empty - - -- Resolves a target activity for a technosphere or waste exchange. Orphan - -- waste outputs resolved by the exact-match cross-DB linker share the same - -- 'crossLinkByFlow' path as technosphere inputs; both are keyed on the - -- consumer-side flow UUID. - resolveCrossDBTarget exchange = - case getTargetActivity db exchange of - Just s -> Just s - Nothing -> crossDBTarget (exchangeFlowId exchange) - - crossDBTarget consumerFlowId = - case M.lookup consumerFlowId crossLinkByFlow of - Nothing -> Nothing - Just link -> - let qualifiedPid = - cdlSourceDatabase link - <> "::" - <> UUID.toText (cdlSupplierActUUID link) - <> "_" - <> UUID.toText (cdlSupplierProdUUID link) - in Just - ActivitySummary - { prsProcessId = qualifiedPid - , prsName = cdlFlowName link - , prsLocation = cdlLocation link - , prsProduct = cdlFlowName link - , prsProductAmount = 1.0 - , prsProductUnit = cdlExchangeUnit link - , prsAllocationPercent = Nothing - , prsAllocationFormula = Nothing - , prsNativeType = Nothing - } + let linkMap = case findProcessIdForActivity db activity of + Just pid -> buildCrossDBLinkMap db pid + Nothing -> M.empty + in map (toExchangeDetail db linkMap) (filter filterFn (exchanges activity)) + +toExchangeDetail :: Database -> M.Map UUID CrossDBLink -> Exchange -> ExchangeDetail +toExchangeDetail db links exchange = + let unitForExchange = M.findWithDefault unresolvedUnit (exchangeUnitId exchange) (dbUnits db) + exUnitName = getUnitNameForExchange (dbUnits db) exchange + target = resolveTargetSummary db links exchange + in case lookupExchangeFlow db exchange of + Just fk -> + ExchangeDetail exchange (apiFlowOfKind fk) (flowKindUnitName (dbUnits db) fk) unitForExchange exUnitName target + Nothing -> + ExchangeDetail exchange (ApiUnresolvedFlow (exchangeFlowId exchange)) "" unitForExchange exUnitName Nothing -- | Get detailed input exchanges getActivityInputDetails :: Database -> Activity -> [ExchangeDetail] diff --git a/src/Types.hs b/src/Types.hs index 29acbc0f..6f2602d2 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -546,6 +546,12 @@ flowKindUnitName udb (TechKind f) = getUnitNameForTechFlow udb f flowKindUnitName udb (BioKind f) = getUnitNameForBioFlow udb f flowKindUnitName udb (WasteKind f) = getUnitNameForWasteFlow udb f +-- | Biosphere compartment, if any. Tech/waste flows carry no compartment. +flowKindCompartment :: FlowKind -> Maybe Compartment +flowKindCompartment (TechKind _) = Nothing +flowKindCompartment (BioKind f) = bfCompartment f +flowKindCompartment (WasteKind _) = Nothing + -- | Unit database (deduplicated) type UnitDB = M.Map UUID Unit @@ -877,6 +883,19 @@ findProcessIdByProductFlow :: Database -> UUID -> Maybe ProcessId findProcessIdByProductFlow db flowUUID = M.lookup flowUUID (piByUUID $ dbProductIndex db) +{- | Look up an exchange's flow on the appropriate side. Each exchange variant +has exactly one flow side by construction (tech, bio, or waste), so the +result is a single 'FlowKind' or 'Nothing' when the UUID is absent from the +database. +-} +lookupExchangeFlow :: Database -> Exchange -> Maybe FlowKind +lookupExchangeFlow db TechnosphereExchange{techFlowId = fid} = + TechKind <$> M.lookup fid (dbTechFlows db) +lookupExchangeFlow db BiosphereExchange{bioFlowId = fid} = + BioKind <$> M.lookup fid (dbBioFlows db) +lookupExchangeFlow db WasteExchange{waFlowId = fid} = + WasteKind <$> M.lookup fid (dbWasteFlows db) + {- | Search products by name (for future product search feature) Returns all ProcessIds that produce products matching the given name -} From 0cc8387e81bd4605f221329db8620620b8e8d778 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 23:29:52 +0200 Subject: [PATCH 28/43] refactor(service): factor graph builders into small named helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three graph-building functions in src/Service.hs each carried a deep where-clause or do-block. The shapes were similar (per-variant node construction, accumulator folds, biosphere-at-root special-casing) but duplicated. Extract small helpers so each function reads top-to-bottom. extractBiosphereNodesAndEdges: - mkBiosphereExportNode: pure ExportNode constructor - mkBiosphereTreeEdge: pure TreeEdge constructor with direction - maxBiosphereFlows: lift the magic 50 to a named top-level constant - Inner fold step is now four lines extractNodesAndEdges: - mkActivityExportNode: shared between TreeLeaf and TreeNode (the two constructors that wrap an Activity) - mkLoopExportNode: TreeLoop-specific - mkTechnosphereTreeEdge: pure constructor for child edges - withRootBiosphere: collapses the 'if depth == 0 then bio else id' block that appeared in both TreeLeaf and TreeNode arms - Each arm is now ~5 lines buildActivityGraph: - selectSignificantActivities: pure helper, no longer inlined; also drops a partial '!!' in favour of a comprehension-guard - isInputLinkTo: exhaustive predicate, replaces nested case-of inside L.find - mkGraphEdgeFromTriple: uses Maybe-do to short-circuit when either endpoint is below cutoff (avoids computing flow/unit fields that will be thrown away) - mkGraphNode: drops the 'error \"Invalid ProcessId in graph\"' partial function in favour of a sentinel node (Vector !? + fallback) — matches the "no silent errors, no runtime crashes" rules - Body shrinks from ~115 lines of inlined steps to ~13 lines of pipeline assembly - Switches the edge list-comp + 'Just e <- edges' filter to mapMaybe Verified: cabal build clean, cabal test 1107 / 0 failures. --- src/Service.hs | 484 ++++++++++++++++++++++++------------------------- 1 file changed, 239 insertions(+), 245 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index 6e106f22..a9253db1 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -20,7 +20,7 @@ import qualified Data.IntSet as IS import qualified Data.List as L import qualified Data.List.NonEmpty as NE import qualified Data.Map as M -import Data.Maybe (catMaybes, fromMaybe, isJust) +import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe) import Data.Sequence (Seq (..), (|>)) import qualified Data.Set as S import Data.Text (Text) @@ -463,146 +463,154 @@ extractCompartment category = then "soil" else "other" --- | Extract biosphere exchanges from an activity and create nodes and edges +{- | Cap on biosphere flows shown per activity. System processes can declare +hundreds; we keep the top-N by |amount| to keep graphs renderable. +-} +maxBiosphereFlows :: Int +maxBiosphereFlows = 50 + +-- | ExportNode for a single biosphere flow attached to a parent activity. +mkBiosphereExportNode :: UnitDB -> BiosphereFlow -> Text -> Int -> Bool -> ExportNode +mkBiosphereExportNode units flow parentPid depth isEmission = + let compartmentTxt = bfCompartmentName flow + in ExportNode + { enId = UUID.toText (bfId flow) + , enName = bfName flow + , enDescription = [compartmentTxt] + , enLocation = "" + , enUnit = getUnitNameForBioFlow units flow + , enNodeType = if isEmission then BiosphereEmissionNode else BiosphereResourceNode + , enDepth = depth + , enLoopTarget = Nothing + , enParentId = Just parentPid + , enChildrenCount = 0 + , enCompartment = Just compartmentTxt + } + +-- | Edge linking an activity to a biosphere flow. Direction depends on whether +-- the exchange is an emission (activity -> flow) or a resource (flow -> activity). +mkBiosphereTreeEdge :: UnitDB -> BiosphereFlow -> Text -> Bool -> Exchange -> TreeEdge +mkBiosphereTreeEdge units flow activityPid isEmission ex = + let flowIdText = UUID.toText (bfId flow) + (edgeFrom, edgeTo, edgeType) = + if isEmission + then (activityPid, flowIdText, BiosphereEmissionEdge) + else (flowIdText, activityPid, BiosphereResourceEdge) + in TreeEdge + { teFrom = edgeFrom + , teTo = edgeTo + , teFlow = FlowInfo (bfId flow) (bfName flow) (bfCompartmentName flow) + , teQuantity = exchangeAmount ex + , teUnit = getUnitNameForBioFlow units flow + , teEdgeType = edgeType + } + +-- | Extract biosphere exchanges from an activity and create nodes and edges. extractBiosphereNodesAndEdges :: Database -> Activity -> Text -> Int -> M.Map Text ExportNode -> [TreeEdge] -> (M.Map Text ExportNode, [TreeEdge]) extractBiosphereNodesAndEdges db activity activityProcessId depth nodeAcc edgeAcc = - let allBiosphereExchanges = [ex | ex <- exchanges activity, isBiosphereExchange ex] - -- Limit to top 50 most significant flows to prevent performance issues with system processes - maxBiosphereFlows = 50 - biosphereExchanges = - take maxBiosphereFlows $ - L.sortBy - (\a b -> compare (abs (exchangeAmount b)) (abs (exchangeAmount a))) - allBiosphereExchanges - processBiosphere ex (nodeAcc', edgeAcc') = - case M.lookup (exchangeFlowId ex) (dbBioFlows db) of - Nothing -> (nodeAcc', edgeAcc') - Just flow -> - let flowIdText = UUID.toText (bfId flow) - isEmission = not (exchangeIsInput ex) -- False = emission, True = resource - nodeType = if isEmission then BiosphereEmissionNode else BiosphereResourceNode - compartmentTxt = bfCompartmentName flow - biosphereNode = - ExportNode - { enId = flowIdText - , enName = bfName flow - , enDescription = [compartmentTxt] - , enLocation = "" - , enUnit = getUnitNameForBioFlow (dbUnits db) flow - , enNodeType = nodeType - , enDepth = depth - , enLoopTarget = Nothing - , enParentId = Just activityProcessId - , enChildrenCount = 0 - , enCompartment = Just compartmentTxt - } - nodeAcc'' = M.insert flowIdText biosphereNode nodeAcc' - (edgeFrom, edgeTo, edgeType) = - if isEmission - then (activityProcessId, flowIdText, BiosphereEmissionEdge) - else (flowIdText, activityProcessId, BiosphereResourceEdge) - edge = - TreeEdge - { teFrom = edgeFrom - , teTo = edgeTo - , teFlow = FlowInfo (bfId flow) (bfName flow) compartmentTxt - , teQuantity = exchangeAmount ex - , teUnit = getUnitNameForBioFlow (dbUnits db) flow - , teEdgeType = edgeType - } - edgeAcc'' = edge : edgeAcc' - in (nodeAcc'', edgeAcc'') - in foldr processBiosphere (nodeAcc, edgeAcc) biosphereExchanges - --- | Extract nodes and edges from LoopAwareTree + foldr step (nodeAcc, edgeAcc) topBiosphereExchanges + where + units = dbUnits db + topBiosphereExchanges = + take maxBiosphereFlows $ + L.sortBy (\a b -> compare (abs (exchangeAmount b)) (abs (exchangeAmount a))) $ + filter isBiosphereExchange (exchanges activity) + step ex acc@(nodes, edges) = case M.lookup (exchangeFlowId ex) (dbBioFlows db) of + Nothing -> acc + Just flow -> + let isEmission = not (exchangeIsInput ex) + node = mkBiosphereExportNode units flow activityProcessId depth isEmission + edge = mkBiosphereTreeEdge units flow activityProcessId isEmission ex + in (M.insert (UUID.toText (bfId flow)) node nodes, edge : edges) + +-- | ExportNode for an activity-bearing tree node (TreeLeaf or TreeNode). +mkActivityExportNode :: Database -> Activity -> Text -> Int -> Maybe Text -> ExportNode +mkActivityExportNode db activity nodeId depth parentId = + ExportNode + { enId = nodeId + , enName = activityName activity + , enDescription = activityDescription activity + , enLocation = activityLocation activity + , enUnit = activityUnit activity + , enNodeType = ActivityNode + , enDepth = depth + , enLoopTarget = Nothing + , enParentId = parentId + , enChildrenCount = countPotentialChildren db activity + , enCompartment = Nothing + } + +-- | ExportNode for a TreeLoop. Looks up the referenced activity for its real +-- unit and location; falls back to "N/A" sentinels when the referent is missing. +mkLoopExportNode :: Database -> UUID -> Text -> Text -> Int -> Maybe Text -> ExportNode +mkLoopExportNode db uuid nodeId name loopDepth parentId = + let (actualLocation, actualUnit) = case findActivityByActivityUUID db uuid of + Just act -> (activityLocation act, activityUnit act) + Nothing -> ("N/A", "N/A") + in ExportNode + { enId = nodeId + , enName = name + , enDescription = ["Loop reference"] + , enLocation = actualLocation + , enUnit = actualUnit + , enNodeType = LoopNode + , enDepth = loopDepth + , enLoopTarget = Just (UUID.toText uuid) + , enParentId = parentId + , enChildrenCount = 0 + , enCompartment = Nothing + } + +{- | Attach biosphere nodes/edges to the accumulator when we're at the root of +the tree (depth == 0). Below the root we leave the accumulator untouched to +keep the graph readable. +-} +withRootBiosphere + :: Database + -> Activity + -> Text + -> Int + -> (M.Map Text ExportNode, [TreeEdge]) + -> (M.Map Text ExportNode, [TreeEdge]) +withRootBiosphere db activity pid depth acc@(nodes, edges) + | depth == 0 = extractBiosphereNodesAndEdges db activity pid depth nodes edges + | otherwise = acc + +-- | Technosphere edge from the current node to a child subtree. +mkTechnosphereTreeEdge :: UnitDB -> Text -> Text -> Double -> TechnosphereFlow -> TreeEdge +mkTechnosphereTreeEdge units fromPid toPid quantity flow = + TreeEdge + { teFrom = fromPid + , teTo = toPid + , teFlow = FlowInfo (tfId flow) (tfName flow) "" + , teQuantity = quantity + , teUnit = getUnitNameForTechFlow units flow + , teEdgeType = TechnosphereEdge + } + +-- | Extract nodes and edges from a 'LoopAwareTree'. extractNodesAndEdges :: Database -> LoopAwareTree -> Int -> Maybe Text -> M.Map Text ExportNode -> [TreeEdge] -> (M.Map Text ExportNode, [TreeEdge], TreeStats) extractNodesAndEdges db tree depth parentId nodeAcc edgeAcc = case tree of TreeLeaf activity -> - let childrenCount = countPotentialChildren db activity - processIdText = getTreeNodeId db tree - node = - ExportNode - { enId = processIdText -- Now ProcessId format - , enName = activityName activity - , enDescription = activityDescription activity - , enLocation = activityLocation activity - , enUnit = activityUnit activity - , enNodeType = ActivityNode - , enDepth = depth - , enLoopTarget = Nothing - , enParentId = parentId - , enChildrenCount = childrenCount - , enCompartment = Nothing - } - nodes' = M.insert processIdText node nodeAcc -- Use ProcessId as key - -- Add biosphere nodes and edges only for depth 0 (root level) - (nodes'', edges') = - if depth == 0 - then extractBiosphereNodesAndEdges db activity processIdText depth nodes' edgeAcc - else (nodes', edgeAcc) + let nodeId = getTreeNodeId db tree + nodes' = M.insert nodeId (mkActivityExportNode db activity nodeId depth parentId) nodeAcc + (nodes'', edges') = withRootBiosphere db activity nodeId depth (nodes', edgeAcc) in (nodes'', edges', TreeStats 1 0 1) TreeLoop uuid name loopDepth -> - let nodeId = getTreeNodeId db tree -- Use ProcessId format for consistency - uuidText = UUID.toText uuid -- Keep bare UUID for loopTarget - -- Look up the actual activity to get real unit and location - maybeActivity = findActivityByActivityUUID db uuid - (actualLocation, actualUnit) = case maybeActivity of - Just activity -> (activityLocation activity, activityUnit activity) - Nothing -> ("N/A", "N/A") -- Fallback only if activity not found - node = - ExportNode - { enId = nodeId -- Now uses ProcessId format - , enName = name - , enDescription = ["Loop reference"] - , enLocation = actualLocation - , enUnit = actualUnit - , enNodeType = LoopNode - , enDepth = loopDepth - , enLoopTarget = Just uuidText - , enParentId = parentId - , enChildrenCount = 0 -- Loops don't expand - , enCompartment = Nothing - } - nodes' = M.insert nodeId node nodeAcc -- Store with ProcessId format key + let nodeId = getTreeNodeId db tree + nodes' = M.insert nodeId (mkLoopExportNode db uuid nodeId name loopDepth parentId) nodeAcc in (nodes', edgeAcc, TreeStats 1 1 0) TreeNode activity children -> - let childrenCount = countPotentialChildren db activity - currentProcessId = getTreeNodeId db tree - parentNode = - ExportNode - { enId = currentProcessId -- Now ProcessId format - , enName = activityName activity - , enDescription = activityDescription activity - , enLocation = activityLocation activity - , enUnit = activityUnit activity - , enNodeType = ActivityNode - , enDepth = depth - , enLoopTarget = Nothing - , enParentId = parentId - , enChildrenCount = childrenCount - , enCompartment = Nothing - } - nodes' = M.insert currentProcessId parentNode nodeAcc -- Use ProcessId as key - processChild (quantity, flow, subtree) (nodeAcc', edgeAcc', statsAcc) = - let (childNodes, childEdges, childStats') = extractNodesAndEdges db subtree (depth + 1) (Just currentProcessId) nodeAcc' edgeAcc' - edge = - TreeEdge - { teFrom = currentProcessId - , teTo = getTreeNodeId db subtree - , teFlow = FlowInfo (tfId flow) (tfName flow) "" - , teQuantity = quantity - , teUnit = getUnitNameForTechFlow (dbUnits db) flow - , teEdgeType = TechnosphereEdge - } - newStats = statsAcc <> childStats' - in (childNodes, edge : childEdges, newStats) - (finalNodes, finalEdges, combinedStats) = foldr processChild (nodes', edgeAcc, TreeStats 1 0 0) children - -- Add biosphere nodes and edges only for depth 0 (root level) - (finalNodesWithBio, finalEdgesWithBio) = - if depth == 0 - then extractBiosphereNodesAndEdges db activity currentProcessId depth finalNodes finalEdges - else (finalNodes, finalEdges) - in (finalNodesWithBio, finalEdgesWithBio, combinedStats) + let nodeId = getTreeNodeId db tree + nodes' = M.insert nodeId (mkActivityExportNode db activity nodeId depth parentId) nodeAcc + (childNodes, childEdges, childStats) = foldr (processChild nodeId) (nodes', edgeAcc, TreeStats 1 0 0) children + (finalNodes, finalEdges) = withRootBiosphere db activity nodeId depth (childNodes, childEdges) + in (finalNodes, finalEdges, childStats) + where + processChild parentPid (quantity, flow, subtree) (nodes, edges, stats) = + let (n', e', s') = extractNodesAndEdges db subtree (depth + 1) (Just parentPid) nodes edges + edge = mkTechnosphereTreeEdge (dbUnits db) parentPid (getTreeNodeId db subtree) quantity flow + in (n', edge : e', stats <> s') -- | Convert LoopAwareTree to TreeExport format for JSON serialization convertToTreeExport :: Database -> Text -> Int -> LoopAwareTree -> TreeExport @@ -665,126 +673,112 @@ filterTreeExport pat export = meta = (teTree export){tmTotalNodes = M.size filteredNodes} in export{teTree = meta, teNodes = filteredNodes, teEdges = filteredEdges} -{- | Build activity network graph from factorized matrix column -Uses efficient sparse matrix operations to extract connections +{- | Activities whose absolute cumulative value clears the threshold, plus the +root activity (which we always surface, even if it falls below). Out-of-bounds +roots become a zero-valued entry rather than crashing. +-} +selectSignificantActivities :: Double -> ProcessId -> [Double] -> [(ProcessId, Double)] +selectSignificantActivities threshold rootPid supplyList = + let kept = + [ (fromIntegral idx :: ProcessId, val) + | (idx, val) <- zip [(0 :: Int) ..] supplyList + , abs val > threshold || idx == fromIntegral rootPid + ] + rootInBounds = fromIntegral rootPid < length supplyList + in if rootInBounds then kept else (rootPid, 0.0) : kept + +{- | True iff the exchange is a technosphere @Input@ whose link points at the +given target activity. Waste exchanges aren't traversed by the graph builder +(they don't form upstream tech edges). +-} +isInputLinkTo :: UUID -> Exchange -> Bool +isInputLinkTo targetUUID ex@TechnosphereExchange{techRole = Input} = + exchangeActivityLinkId ex == Just targetUUID +isInputLinkTo _ TechnosphereExchange{} = False +isInputLinkTo _ BiosphereExchange{} = False +isInputLinkTo _ WasteExchange{} = False + +{- | Build one 'GraphEdge' from a sparse technosphere triple. Returns 'Nothing' +when either endpoint is outside the projected subgraph (i.e. below cutoff) or +the triple itself is zero. When the supplier flow can't be resolved we still +emit the edge — with sentinel name/unit — so the gap is debuggable instead of +silently dropped. +-} +mkGraphEdgeFromTriple + :: Database + -> V.Vector Activity + -> UnitDB + -> TechFlowDB + -> M.Map ProcessId Int + -> SparseTriple + -> Maybe GraphEdge +mkGraphEdgeFromTriple db activities units flows nodeIdMap (SparseTriple row col value) + | value == 0.0 = Nothing + | otherwise = do + let sourcePid = fromIntegral row :: ProcessId + targetPid = fromIntegral col :: ProcessId + src <- M.lookup sourcePid nodeIdMap + tgt <- M.lookup targetPid nodeIdMap + let matchingExchange = do + srcAct <- activities V.!? fromIntegral row + (targetUUID, _) <- processIdToUUIDs db targetPid + L.find (isInputLinkTo targetUUID) (exchanges srcAct) + flowInfo = matchingExchange >>= \ex -> M.lookup (exchangeFlowId ex) flows + uName = maybe "" (getUnitNameForTechFlow units) flowInfo + flowName = case (flowInfo, matchingExchange) of + (Just f, _) -> tfName f + (Nothing, Just ex) -> unresolvedFlowName (exchangeFlowId ex) + (Nothing, Nothing) -> "" + pure $ GraphEdge src tgt (realToFrac value) uName flowName + +{- | 'GraphNode' for one significant activity. Out-of-bounds 'ProcessId's get a +sentinel node rather than crashing — preserves the project's "no silent +errors, no silent successes" stance. +-} +mkGraphNode :: Database -> V.Vector Activity -> Int -> (ProcessId, Double) -> GraphNode +mkGraphNode db activities nodeId (pid, cumulativeVal) = + let processIdText = processIdToText db pid + in case activities V.!? fromIntegral pid of + Just activity -> + GraphNode + { gnNodeId = nodeId + , gnLabel = activityName activity + , gnValue = cumulativeVal + , gnUnit = activityUnit activity + , gnProcessId = processIdText + , gnLocation = activityLocation activity + } + Nothing -> + GraphNode + { gnNodeId = nodeId + , gnLabel = " processIdText <> ">" + , gnValue = cumulativeVal + , gnUnit = "" + , gnProcessId = processIdText + , gnLocation = "" + } + +{- | Build activity network graph from factorized matrix column. +Uses efficient sparse matrix operations to extract connections. -} buildActivityGraph :: Database -> SharedSolver -> Text -> Double -> IO (Either ServiceError GraphExport) -buildActivityGraph db sharedSolver queryText cutoffPercent = do +buildActivityGraph db sharedSolver queryText cutoffPercent = case resolveActivityAndProcessId db queryText of - Left err -> return $ Left err + Left err -> pure (Left err) Right (processId, _activity) -> do - -- Step 1: Get factorized column (cumulative amounts) by solving - let activityIndex = dbActivityIndex db - demandVec = buildDemandVectorFromIndex activityIndex processId - - -- Solve to get cumulative amounts (lazy factorization on first call) - supplyVec <- solveWithSharedSolver sharedSolver demandVec + supplyVec <- solveWithSharedSolver sharedSolver (buildDemandVectorFromIndex (dbActivityIndex db) processId) let supplyList = toList supplyVec - totalSupply = sum [abs val | val <- supplyList] - threshold = totalSupply * (cutoffPercent / 100.0) - - -- Step 2: Filter by cutoff to get significant activities - -- Build list of (ProcessId, cumulative value) for activities above threshold - -- Always include the root activity (processId) even if below threshold - let allSignificantActivities = - [ (fromIntegral idx :: ProcessId, val) - | (idx, val) <- zip [(0 :: Int) ..] supplyList - , abs val > threshold - ] - -- Ensure root activity is always included - significantActivities = - if processId `elem` map fst allSignificantActivities - then allSignificantActivities - else - let rootValue = - if fromIntegral processId < length supplyList - then supplyList !! fromIntegral processId - else 0.0 - in (processId, rootValue) : allSignificantActivities - - -- Step 3: Build node ID mapping (ProcessId -> Int) for frontend efficiency - let nodeIdMap = M.fromList [(pid, idx) | (idx, (pid, _)) <- zip [0 ..] significantActivities] - - -- Step 4: Extract direct connections from technosphere matrix - -- For each significant activity, find edges in dbTechnosphereTriples - let techTriples = dbTechnosphereTriples db + threshold = sum (map abs supplyList) * (cutoffPercent / 100.0) + significantActivities = selectSignificantActivities threshold processId supplyList + nodeIdMap = M.fromList [(pid, idx) | (idx, (pid, _)) <- zip [0 ..] significantActivities] activities = dbActivities db - units = dbUnits db - flows = dbTechFlows db - - -- Build edges: iterate through sparse triplets edges = - [ let sourceNodeId = M.lookup (fromIntegral row :: ProcessId) nodeIdMap - targetNodeId = M.lookup (fromIntegral col :: ProcessId) nodeIdMap - sourceActivity = - if fromIntegral row < V.length activities - then Just $ activities V.! fromIntegral row - else Nothing - targetProcessId = fromIntegral col :: ProcessId - -- Get target activity UUID from process ID table - targetActivityUUID = case processIdToUUIDs db targetProcessId of - Just (actUUID, _prodUUID) -> Just actUUID - Nothing -> Nothing - -- Find the technosphere-input exchange that points to - -- this target activity, so we can recover both the - -- flow UUID and (if known) its name + unit. - matchingExchange = do - srcAct <- sourceActivity - targetUUID <- targetActivityUUID - L.find - ( \ex -> case ex of - TechnosphereExchange{techRole = Input} -> - exchangeActivityLinkId ex == Just targetUUID - TechnosphereExchange{} -> False - BiosphereExchange{} -> False - -- Waste exchanges aren't traversed by the graph builder - -- (they don't form upstream tech edges). - WasteExchange{} -> False - ) - (exchanges srcAct) - flowInfo = matchingExchange >>= \ex -> M.lookup (exchangeFlowId ex) flows - -- When the supplier flow isn't in the tech map, the - -- exchange UUID is the only debug handle we have. - -- Surface it via the same sentinel shape as - -- ExchangeWithUnit / ApiUnresolvedFlow so the graph - -- doesn't silently mask a broken link. - uName = maybe "" (getUnitNameForTechFlow units) flowInfo - flowNameText = case (flowInfo, matchingExchange) of - (Just f, _) -> tfName f - (Nothing, Just ex) -> unresolvedFlowName (exchangeFlowId ex) - (Nothing, Nothing) -> "" - in case (sourceNodeId, targetNodeId) of - (Just src, Just tgt) -> - Just $ GraphEdge src tgt (realToFrac value) uName flowNameText - _ -> Nothing - | SparseTriple row col value <- U.toList techTriples - , value /= 0.0 - ] - - validEdges = [e | Just e <- edges] - - -- Step 5: Build nodes - let nodes = - [ let activity = - if fromIntegral pid < V.length activities - then activities V.! fromIntegral pid - else error $ "Invalid ProcessId in graph: " ++ show pid - processIdText = processIdToText db pid - in GraphNode - { gnNodeId = nodeId - , gnLabel = activityName activity - , gnValue = cumulativeVal - , gnUnit = activityUnit activity - , gnProcessId = processIdText - , gnLocation = activityLocation activity - } - | (nodeId, (pid, cumulativeVal)) <- zip [0 ..] significantActivities - ] - - -- Step 6: Build unit groups for normalization - let unitGroups = buildUnitGroups [gnUnit n | n <- nodes] - - return $ Right $ GraphExport nodes validEdges unitGroups + mapMaybe + (mkGraphEdgeFromTriple db activities (dbUnits db) (dbTechFlows db) nodeIdMap) + (U.toList (dbTechnosphereTriples db)) + nodes = zipWith (mkGraphNode db activities) [0 ..] significantActivities + unitGroups = buildUnitGroups (map gnUnit nodes) + pure $ Right $ GraphExport nodes edges unitGroups -- | Classify units into groups for edge width normalization buildUnitGroups :: [Text] -> M.Map Text Text From dcf6e25234ce4d58957b391d9c3855fab3a09bd4 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 23:34:22 +0200 Subject: [PATCH 29/43] refactor(main): factor server bootstrap into single-purpose helpers runServerWithConfig was a 70-line staircase doing eleven things; createServerApp wrapped a 50-line inner closure that mixed routing, static-file serving and SPA fallback. Each step now has a name. runServerWithConfig (~15 lines now): - applyLoadOverride: pure, applies --load to the config - logLoadedDatabases: collapses the if/M.null branch - resolvePassword: 3-tier CLI -> config -> env fallback - logServerStartup: desktop-mode vs human-banner split - setupIdleTimeout: allocates refs and (optionally) forks the watchdog - wrapWithMiddleware: idleTracking . shutdown . (maybe authMiddleware) createServerApp (~10 lines now): - swaggerHtml: lifted to top-level constant - spaStaticSettings: the wai-app-static config - serveStripped / serveSpaIndex: the two static handlers, each now a standalone Application instead of let-bindings inside a closure - dispatchRequest: the path-based routing if/guards, parameterised on pre-built mcp + Servant apps so the inner closure shrinks to two lines - logRequest: per-request stdout log line Behaviour preserved (cabal test 1107 / 0 failures). Each new helper has a doc comment and is independently understandable. --- app/Main.hs | 286 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 166 insertions(+), 120 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 72e25d25..be95a1d0 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -48,7 +48,7 @@ import Data.String (fromString) import Network.HTTP.Types (status200) import Network.HTTP.Types.Header (hCacheControl, hContentType, hPragma) import Network.Wai (Application, Request (..), Response, ResponseReceived, mapResponseHeaders, pathInfo, rawPathInfo, rawQueryString, requestHeaders, requestMethod, responseLBS, responseStream) -import Network.Wai.Application.Static (defaultWebAppSettings, ssIndices, staticApp) +import Network.Wai.Application.Static (StaticSettings, defaultWebAppSettings, ssIndices, staticApp) import Network.Wai.Handler.Warp (defaultSettings, runSettings, setPort, setTimeout) import Servant (serve) import WaiAppStatic.Types (MaxAge (..), ssMaxAge, unsafeToPiece) @@ -133,76 +133,97 @@ runStopWithoutConfig cliConfig = do rc <- resolveRemoteConfig (globalOptions cliConfig) Nothing executeRemoteCommand mgr rc (globalOptions cliConfig) Stop --- | Run server with multi-database configuration file -runServerWithConfig :: CLIConfig -> ServerOptions -> FilePath -> IO () -runServerWithConfig cliConfig serverOpts cfgFile = do - config <- loadConfigOrDie cfgFile - - -- Apply --load override if specified - let effectiveConfig = case serverLoadDbs serverOpts of - Nothing -> config - Just dbNames -> config{cfgDatabases = map (overrideLoad dbNames) (cfgDatabases config)} +-- | Apply the --load override (if any) to the in-memory config. +applyLoadOverride :: ServerOptions -> Config -> Config +applyLoadOverride serverOpts config = case serverLoadDbs serverOpts of + Nothing -> config + Just dbNames -> config{cfgDatabases = map (overrideLoad dbNames) (cfgDatabases config)} - -- Initialize DatabaseManager (pre-loads databases with load=true) - reportProgress Info "Initializing database manager..." - dbManager <- initDatabaseManager effectiveConfig (noCache (globalOptions cliConfig)) (Just cfgFile) - - -- Log database status (allow starting with no databases for BYOL mode) +-- | Log loaded databases (allows starting with none for BYOL mode). +logLoadedDatabases :: DatabaseManager -> IO () +logLoadedDatabases dbManager = do loadedDbs <- readTVarIO (dmLoadedDbs dbManager) - if M.null loadedDbs - then reportProgress Info "No databases loaded - upload or load one via the web interface" - else reportProgress Info $ "Loaded databases: " ++ intercalate ", " (map T.unpack (M.keys loadedDbs)) - - let port = fromMaybe (scPort (cfgServer effectiveConfig)) (serverPort serverOpts) - - -- Initialize matrix solver (no-op for MUMPS, kept for API compatibility) - initializeSolverForServer + reportProgress Info $ + if M.null loadedDbs + then "No databases loaded - upload or load one via the web interface" + else "Loaded databases: " ++ intercalate ", " (map T.unpack (M.keys loadedDbs)) - -- Get password from CLI, config, or env var - password <- case CLI.Types.serverPassword (globalOptions cliConfig) of - Just pwd -> return (Just pwd) - Nothing -> case scPassword (cfgServer effectiveConfig) of - Just pwd -> return (Just $ T.unpack pwd) - Nothing -> lookupEnv "VOLCA_PASSWORD" - - -- Determine static directory (--static-dir or default "web/dist") - let staticDir = fromMaybe "web/dist" (serverStaticDir serverOpts) - desktopMode = serverDesktopMode serverOpts - - -- In desktop mode, print machine-readable port for launcher, then minimal logging - if desktopMode - then do - putStrLn $ "VOLCA_PORT=" ++ show port - hFlush stdout - else do - reportProgress Info $ "Starting API server on port " ++ show port - reportProgress Info $ "Tree depth: " ++ show (serverTreeDepth serverOpts) - case password of - Just _ -> reportProgress Info "Authentication: ENABLED" - Nothing -> reportProgress Info "Authentication: DISABLED (use --password or VOLCA_PASSWORD to enable)" - reportProgress Info $ "Web interface available at: http://localhost:" ++ show port ++ "/" - - -- Idle timeout: track last request time, watchdog activated on demand via API +{- | Resolve the admin password from CLI flag, config file, or env var, in that +order. Returns 'Nothing' when authentication is disabled (no source set). +-} +resolvePassword :: GlobalOptions -> ServerConfig -> IO (Maybe String) +resolvePassword globalOpts serverCfg = case CLI.Types.serverPassword globalOpts of + Just pwd -> pure (Just pwd) + Nothing -> case scPassword serverCfg of + Just pwd -> pure (Just (T.unpack pwd)) + Nothing -> lookupEnv "VOLCA_PASSWORD" + +{- | In desktop mode, print a machine-readable port line for the launcher +to capture and stay quiet. Otherwise emit the human-facing startup banner. +-} +logServerStartup :: ServerOptions -> Int -> Maybe String -> IO () +logServerStartup serverOpts port password + | serverDesktopMode serverOpts = do + putStrLn ("VOLCA_PORT=" ++ show port) + hFlush stdout + | otherwise = do + reportProgress Info ("Starting API server on port " ++ show port) + reportProgress Info ("Tree depth: " ++ show (serverTreeDepth serverOpts)) + reportProgress Info $ case password of + Just _ -> "Authentication: ENABLED" + Nothing -> "Authentication: DISABLED (use --password or VOLCA_PASSWORD to enable)" + reportProgress Info ("Web interface available at: http://localhost:" ++ show port ++ "/") + +{- | Allocate the idle-tracking refs and fork the watchdog when +@--idle-timeout@ is positive. The refs are returned for both the +tracking and the shutdown middleware. +-} +setupIdleTimeout :: ServerOptions -> IO (IORef UTCTime, IORef Bool) +setupIdleTimeout serverOpts = do lastRequestRef <- newIORef =<< getCurrentTime idleActiveRef <- newIORef False - - -- If --idle-timeout is set, activate immediately (for scripts) let idleTimeout = serverIdleTimeout serverOpts when (idleTimeout > 0) $ do - reportProgress Info $ "Idle timeout: " ++ show idleTimeout ++ "s" + reportProgress Info ("Idle timeout: " ++ show idleTimeout ++ "s") writeIORef idleActiveRef True - _ <- forkIO $ idleWatchdog lastRequestRef idleActiveRef idleTimeout + _ <- forkIO (idleWatchdog lastRequestRef idleActiveRef idleTimeout) pure () + pure (lastRequestRef, idleActiveRef) - -- Create app with DatabaseManager - API handlers fetch current DB dynamically - baseApp <- Main.createServerApp dbManager (serverTreeDepth serverOpts) staticDir desktopMode password (cfgHosting effectiveConfig) (cfgClassificationPresets effectiveConfig) - let appWithIdleAndShutdown = +-- | Stack idle-tracking, shutdown-endpoint and (optionally) auth middleware. +wrapWithMiddleware :: Maybe String -> IORef UTCTime -> IORef Bool -> Application -> Application +wrapWithMiddleware password lastRequestRef idleActiveRef baseApp = + let withIdleAndShutdown = idleTrackingMiddleware lastRequestRef $ shutdownEndpoint lastRequestRef idleActiveRef baseApp - finalApp = case password of - Just pwd -> authMiddleware (C8.pack pwd) appWithIdleAndShutdown - Nothing -> appWithIdleAndShutdown - settings = setTimeout 600 $ setPort port defaultSettings + in case password of + Just pwd -> authMiddleware (C8.pack pwd) withIdleAndShutdown + Nothing -> withIdleAndShutdown + +-- | Run server with multi-database configuration file +runServerWithConfig :: CLIConfig -> ServerOptions -> FilePath -> IO () +runServerWithConfig cliConfig serverOpts cfgFile = do + config <- applyLoadOverride serverOpts <$> loadConfigOrDie cfgFile + reportProgress Info "Initializing database manager..." + dbManager <- initDatabaseManager config (noCache (globalOptions cliConfig)) (Just cfgFile) + logLoadedDatabases dbManager + initializeSolverForServer + let port = fromMaybe (scPort (cfgServer config)) (serverPort serverOpts) + staticDir = fromMaybe "web/dist" (serverStaticDir serverOpts) + password <- resolvePassword (globalOptions cliConfig) (cfgServer config) + logServerStartup serverOpts port password + (lastRequestRef, idleActiveRef) <- setupIdleTimeout serverOpts + baseApp <- + createServerApp + dbManager + (serverTreeDepth serverOpts) + staticDir + (serverDesktopMode serverOpts) + password + (cfgHosting config) + (cfgClassificationPresets config) + let finalApp = wrapWithMiddleware password lastRequestRef idleActiveRef baseApp + settings = setTimeout 600 (setPort port defaultSettings) runSettings settings finalApp {- | Run config load-only mode (load all databases from config and exit) @@ -226,7 +247,87 @@ overrideLoad :: [T.Text] -> DatabaseConfig -> DatabaseConfig overrideLoad dbNames dbConfig = dbConfig{dcLoad = dcName dbConfig `elem` dbNames} --- | Create a Wai application with DatabaseManager +{- | Swagger-UI shell that pulls the OpenAPI spec from our @/api/v1/openapi.json@ +endpoint. Served verbatim from @/api/v1/docs@; constant per build. +-} +swaggerHtml :: BSL.ByteString +swaggerHtml = + "volca API\ + \\ + \\ + \\ + \
\ + \\ + \\ + \" + +{- | Serve the Elm SPA bundle from @staticDir@, with the SPA's @index.html@ as +the directory index and no @max-age@ caching headers. +-} +spaStaticSettings :: FilePath -> StaticSettings +spaStaticSettings staticDir = + (defaultWebAppSettings staticDir) + { ssIndices = [unsafeToPiece (T.pack "index.html")] + , ssMaxAge = NoMaxAge + } + +{- | Serve files under @/static/@ by stripping the prefix and delegating +to wai-app-static. +-} +serveStripped :: StaticSettings -> Application +serveStripped settings req respond = + let strippedPath = BS.drop 7 (rawPathInfo req) + newPathInfo = case pathInfo req of + (segment : rest) | segment == T.pack "static" -> rest + other -> other + staticReq = req{rawPathInfo = strippedPath, pathInfo = newPathInfo} + in staticApp settings staticReq respond + +{- | Serve the SPA shell (@index.html@) for any non-API path, with cache-busting +headers so the browser always re-fetches the latest bundle. +-} +serveSpaIndex :: StaticSettings -> Application +serveSpaIndex settings req respond = + let indexReq = req{rawPathInfo = C8.pack "/", pathInfo = []} + noCacheRespond res = + respond $ + mapResponseHeaders + ( \hs -> + (hCacheControl, C8.pack "no-cache, no-store, must-revalidate") + : (hPragma, C8.pack "no-cache") + : hs + ) + res + in staticApp settings indexReq noCacheRespond + +{- | Path-based request dispatcher. The fixed endpoints (@/mcp@, +@/api/v1/{openapi.json,licenses,docs,logs/stream}@) match exactly; anything +under @/api/@ goes through Servant; @/static/@ serves bundled assets; the +catch-all hands back the SPA so client-side routing can handle the URL. +-} +dispatchRequest :: FilePath -> Application -> Application -> Application +dispatchRequest staticDir mcp apiApp req respond = + let path = rawPathInfo req + settings = spaStaticSettings staticDir + in if + | path == "/mcp" -> mcp req respond + | path == "/api/v1/openapi.json" -> + respond $ responseLBS status200 [(hContentType, "application/json")] (encode volcaOpenApi) + | path == "/api/v1/licenses" -> respond licensesResponse + | path == "/api/v1/docs" -> + respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml + | path == "/api/v1/logs/stream" -> handleLogStream req respond + | C8.pack "/api/" `BS.isPrefixOf` path -> apiApp req respond + | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped settings req respond + | otherwise -> serveSpaIndex settings req respond + +-- | Per-request log line written to stdout (suppressed in desktop mode). +logRequest :: Request -> IO () +logRequest req = do + putStrLn $ C8.unpack (requestMethod req) ++ " " ++ C8.unpack (rawPathInfo req <> rawQueryString req) + hFlush stdout + +-- | Create a Wai application with DatabaseManager. createServerApp :: DatabaseManager -> Int -> FilePath -> Bool -> Maybe String -> Maybe HostingConfig -> [ClassificationPreset] -> IO Application createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingConfig filterPresets = do -- The MCP @web_url@ deep links point at Elm SPA routes served from @@ -236,66 +337,11 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon unless (desktopMode || hasFrontend) $ reportProgress Info "Frontend not bundled — MCP responses will omit 'web_url'" mcp <- mcpApp dbManager filterPresets hasFrontend - let openApiJson = encode volcaOpenApi - swaggerHtml = - "volca API\ - \\ - \\ - \\ - \
\ - \\ - \\ - \" - return $ \req respond -> do - let path = rawPathInfo req - qs = rawQueryString req - fullUrl = path <> qs - - -- Simple request logging (suppress in desktop mode) - unless desktopMode $ do - putStrLn $ C8.unpack (requestMethod req) ++ " " ++ C8.unpack fullUrl - hFlush stdout - - -- Route requests based on path prefix. - let staticSettings = - (defaultWebAppSettings staticDir) - { ssIndices = [unsafeToPiece (T.pack "index.html")] - , ssMaxAge = NoMaxAge - } - - serveStripped = - let strippedPath = BS.drop 7 path - newPathInfo = case pathInfo req of - (segment : rest) | segment == T.pack "static" -> rest - other -> other - staticReq = req{rawPathInfo = strippedPath, pathInfo = newPathInfo} - in staticApp staticSettings staticReq respond - - serveSpaIndex = - let indexReq = req{rawPathInfo = C8.pack "/", pathInfo = []} - noCacheRespond res = - respond $ - mapResponseHeaders - ( \hs -> - (hCacheControl, C8.pack "no-cache, no-store, must-revalidate") - : (hPragma, C8.pack "no-cache") - : hs - ) - res - in staticApp staticSettings indexReq noCacheRespond - - if - | path == "/mcp" -> mcp req respond - | path == "/api/v1/openapi.json" -> - respond $ responseLBS status200 [(hContentType, "application/json")] openApiJson - | path == "/api/v1/licenses" -> respond licensesResponse - | path == "/api/v1/docs" -> - respond $ responseLBS status200 [(hContentType, "text/html; charset=utf-8")] swaggerHtml - | path == "/api/v1/logs/stream" -> handleLogStream req respond - | C8.pack "/api/" `BS.isPrefixOf` path -> - serve lcaAPI (lcaServer (mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets)) req respond - | C8.pack "/static/" `BS.isPrefixOf` path -> serveStripped - | otherwise -> serveSpaIndex + let env = mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets + apiApp = serve lcaAPI (lcaServer env) + pure $ \req respond -> do + unless desktopMode (logRequest req) + dispatchRequest staticDir mcp apiApp req respond -- | SSE endpoint for real-time log streaming handleLogStream :: Request -> (Response -> IO ResponseReceived) -> IO ResponseReceived From 095f2605a524c05b4cb35e55ace341c169c12d6d Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Wed, 27 May 2026 23:39:20 +0200 Subject: [PATCH 30/43] refactor(service): dedupe ActivitySummary builders, slim searchActivities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The (ProcessId, Activity) -> ActivitySummary projection was inlined in five places — search results, supply-chain entries, the products index, the technosphere navigation helper, and the per-flow consumer list — each running the same getReferenceProductInfo + activity-allocation + native-type plumbing. Extract once, replace at every call site that uses 'dbUnits db' as the unit DB. New helpers: - mkActivitySummary :: Database -> ProcessId -> Activity -> ActivitySummary - unknownActivitySummary :: Database -> ProcessId -> ActivitySummary (surfaces a typed placeholder when the products index points at a dropped Activity, instead of silently weaving Nothings through the record) searchActivities body shrunk from ~55 to ~16 lines: - tryBm25Retrieve uses 'do' + 'guard' over Maybe instead of nested case-with-guards, expressing the four predicates as a flat chain - activityRowComparator extracted to a named top-level (the inline case-of needed two-level matching to compare per-tuple) - paginateSearchResults wraps the offset / limit / hasMore arithmetic plus the per-row projection, returning the wire-format SearchResults directly — the per-call boilerplate disappears getActivitiesUsingFlow, getTargetActivity, getAllProductsForActivity each collapse to a couple of lines. Verified: cabal build clean, cabal test 1107 / 0 failures. --- src/Service.hs | 193 ++++++++++++++++++++++++------------------------- 1 file changed, 93 insertions(+), 100 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index a9253db1..fa2e8496 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -10,7 +10,7 @@ import CLI.Types (DebugMatricesOptions (..)) import Control.Applicative ((<|>)) import Control.Concurrent.Async (mapConcurrently) import Control.Exception (SomeException, try) -import Control.Monad (foldM) +import Control.Monad (foldM, guard) import Control.Monad.Trans.Except (ExceptT (..), runExceptT) import Data.Aeson (Value, object, toJSON, (.=)) import Data.Either (fromRight, lefts, rights) @@ -904,67 +904,59 @@ nameFilterSet db mq = do q <- mq if T.null (T.strip q) then Nothing else bm25MatchingPids db q +{- | BM25 retrieval applies only when the user provided a non-empty name +query, didn't request exact matching, and didn't pick an explicit sort +column. Returns 'Nothing' otherwise so the caller falls back to lex-sorted +field matching. +-} +tryBm25Retrieve :: Database -> SearchFilter -> Maybe [(ProcessId, Activity)] +tryBm25Retrieve db (SearchFilter core exactMatch) = do + q <- afcName core + guard (not exactMatch) + guard (afcSort core /= Just "name" && afcSort core /= Just "location") + guard (not (T.null (T.strip q))) + bm25Retrieve db q + +{- | Lex-comparator for activity rows. Defaults to name; 'Just "location"' +picks the location key. +-} +activityRowComparator :: Maybe Text -> (ProcessId, Activity) -> (ProcessId, Activity) -> Ordering +activityRowComparator (Just "location") (_, a) (_, b) = compare (activityLocation a) (activityLocation b) +activityRowComparator _ (_, a) (_, b) = compare (activityName a) (activityName b) + +{- | Apply pagination (offset / limit, defaulting limit to 20) and emit a +'SearchResults' wrapping the projected page. Pure modulo the supplied +@searchTimeMs@. +-} +paginateSearchResults :: Maybe Int -> Maybe Int -> Double -> ((ProcessId, Activity) -> a) -> [(ProcessId, Activity)] -> SearchResults a +paginateSearchResults offsetParam limitParam searchTimeMs project xs = + let offset = maybe 0 (max 0) offsetParam + limit = fromMaybe 20 limitParam + total = length xs + page = map project (take limit (drop offset xs)) + hasMore = offset + limit < total + in SearchResults page total offset limit hasMore searchTimeMs + {- | Search activities (returns same format as API). The exact-match toggle is carried on 'SearchFilter' itself, so there is no separate positional flag. -} searchActivities :: Database -> SearchFilter -> IO (Either ServiceError Value) -searchActivities db (SearchFilter core exactMatch) = do - let nameParam = afcName core - geoParam = afcLocation core - productParam = afcProduct core - classFilters = afcClassifications core - limitParam = afcLimit core - offsetParam = afcOffset core - sortParam = afcSort core - orderParam = afcOrder core +searchActivities db sFilter@(SearchFilter core exactMatch) = do startTime <- getCurrentTime - let isDesc = orderParam == Just "desc" - explicitSort = sortParam == Just "name" || sortParam == Just "location" - -- BM25 retrieval applies only when the user provided a non-empty name - -- query, didn't request exact matching, and didn't pick a sort column. - bm25Retrieved = case nameParam of - Just q - | not exactMatch - , not explicitSort - , not (T.null (T.strip q)) -> - bm25Retrieve db q - _ -> Nothing - actCmp = case sortParam of - Just "location" -> \(_, a) (_, b) -> compare (activityLocation a) (activityLocation b) - _ -> \(_, a) (_, b) -> compare (activityName a) (activityName b) - allResults = case bm25Retrieved of + let allResults = case tryBm25Retrieve db sFilter of Just ranked -> -- BM25 path: ranked candidates → structured filters → preserve score order. - applyStructuredFilters db geoParam productParam classFilters False ranked + applyStructuredFilters db (afcLocation core) (afcProduct core) (afcClassifications core) False ranked Nothing -> -- Non-BM25 path: AND-of-tokens name filter + lex sort. - let rawResults = findActivitiesByFields db nameParam geoParam productParam classFilters exactMatch - in L.sortBy (if isDesc then flip actCmp else actCmp) rawResults - offset = maybe 0 (max 0) offsetParam - limit = fromMaybe 20 limitParam - total = length allResults - pagedResults = take limit $ drop offset allResults - hasMore = offset + limit < total - activityResults = - map - ( \(processId, activity) -> - let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity - in ActivitySummary - { prsProcessId = processIdToText db processId - , prsName = activityName activity - , prsLocation = activityLocation activity - , prsProduct = prodName - , prsProductAmount = prodAmount - , prsProductUnit = prodUnit - , prsAllocationPercent = activityAllocationPercent activity - , prsAllocationFormula = activityAllocationFormula activity - , prsNativeType = activityNativeType activity - } - ) - pagedResults + let cmp = activityRowComparator (afcSort core) + ordered = if afcOrder core == Just "desc" then flip cmp else cmp + raw = findActivitiesByFields db (afcName core) (afcLocation core) (afcProduct core) (afcClassifications core) exactMatch + in L.sortBy ordered raw endTime <- getCurrentTime let searchTimeMs = realToFrac (diffUTCTime endTime startTime) * 1000 :: Double - return $ Right $ toJSON $ SearchResults activityResults total offset limit hasMore searchTimeMs + results = paginateSearchResults (afcOffset core) (afcLimit core) searchTimeMs (uncurry (mkActivitySummary db)) allResults + pure $ Right $ toJSON results -- | List all classification systems and their distinct values for a database getClassifications :: Database -> [ClassificationSystem] @@ -1232,49 +1224,62 @@ getReferenceProductInfo flows units activity = in (name, amount, uName) [] -> ("", 1.0, "") --- | Get all products (ProcessIds) for an activity UUID using the products index +{- | Build an 'ActivitySummary' from a (ProcessId, Activity) pair. Encapsulates +the reference-product + allocation + native-type projection shared by +search results, supply-chain entries, inventory metadata, and exchange-target +navigation. Uses @dbUnits db@ for the unit DB — callers needing a merged +cross-DB unit DB build the record by hand. +-} +mkActivitySummary :: Database -> ProcessId -> Activity -> ActivitySummary +mkActivitySummary db processId activity = + let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) activity + in ActivitySummary + { prsProcessId = processIdToText db processId + , prsName = activityName activity + , prsLocation = activityLocation activity + , prsProduct = prodName + , prsProductAmount = prodAmount + , prsProductUnit = prodUnit + , prsAllocationPercent = activityAllocationPercent activity + , prsAllocationFormula = activityAllocationFormula activity + , prsNativeType = activityNativeType activity + } + +{- | Placeholder summary surfaced when the products index points at a +ProcessId that no longer resolves to an Activity. Carries the raw pid so the +consumer can debug, rather than silently dropping the entry. +-} +unknownActivitySummary :: Database -> ProcessId -> ActivitySummary +unknownActivitySummary db pid = + ActivitySummary + { prsProcessId = processIdToText db pid + , prsName = "Unknown" + , prsLocation = "" + , prsProduct = "Unknown" + , prsProductAmount = 1.0 + , prsProductUnit = "" + , prsAllocationPercent = Nothing + , prsAllocationFormula = Nothing + , prsNativeType = Nothing + } + +-- | Get all products (ProcessIds) for an activity UUID using the products index. getAllProductsForActivity :: Database -> UUID -> [ActivitySummary] getAllProductsForActivity db activityUUID = case M.lookup activityUUID (dbActivityProductsIndex db) of + Nothing -> [] Just processIds -> - [ let mAct = findActivityByProcessId db pid - (prodName, prodAmount, prodUnit) = case mAct of - Just a -> getReferenceProductInfo (dbTechFlows db) (dbUnits db) a - Nothing -> ("Unknown", 1.0, "") - in ActivitySummary - { prsProcessId = processIdToText db pid - , prsName = maybe "Unknown" activityName mAct - , prsLocation = maybe "" activityLocation mAct - , prsProduct = prodName - , prsProductAmount = prodAmount - , prsProductUnit = prodUnit - , prsAllocationPercent = mAct >>= activityAllocationPercent - , prsAllocationFormula = mAct >>= activityAllocationFormula - , prsNativeType = mAct >>= activityNativeType - } + [ maybe (unknownActivitySummary db pid) (mkActivitySummary db pid) (findActivityByProcessId db pid) | pid <- processIds ] - Nothing -> [] --- | Get target activity for technosphere navigation +-- | Get target activity for technosphere navigation. getTargetActivity :: Database -> Exchange -> Maybe ActivitySummary getTargetActivity db exchange = do targetId <- exchangeActivityLinkId exchange targetActivity <- findActivityByActivityUUID db targetId processId <- findProcessIdForActivity db targetActivity - let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) targetActivity - return $ - ActivitySummary - { prsProcessId = processIdToText db processId - , prsName = activityName targetActivity - , prsLocation = activityLocation targetActivity - , prsProduct = prodName - , prsProductAmount = prodAmount - , prsProductUnit = prodUnit - , prsAllocationPercent = activityAllocationPercent targetActivity - , prsAllocationFormula = activityAllocationFormula targetActivity - , prsNativeType = activityNativeType targetActivity - } + pure (mkActivitySummary db processId targetActivity) {- | Get reference product as FlowDetail (if exists). Reference products are technosphere by definition. @@ -1289,29 +1294,17 @@ getActivityReferenceProductDetail db activity = do let uName = getUnitNameForTechFlow (dbUnits db) flow return $ FlowDetail (ApiTechFlow flow) uName usageCount --- | Get activities that use a specific flow as ActivitySummary list +-- | Get activities that use a specific flow as ActivitySummary list. getActivitiesUsingFlow :: Database -> UUID -> [ActivitySummary] getActivitiesUsingFlow db flowUUID = case M.lookup flowUUID (idxByFlow $ dbIndexes db) of Nothing -> [] Just activityUUIDs -> - let uniqueUUIDs = S.toList $ S.fromList activityUUIDs -- Deduplicate activity UUIDs - in [ let (prodName, prodAmount, prodUnit) = getReferenceProductInfo (dbTechFlows db) (dbUnits db) proc - in ActivitySummary - { prsProcessId = processIdToText db processId - , prsName = activityName proc - , prsLocation = activityLocation proc - , prsProduct = prodName - , prsProductAmount = prodAmount - , prsProductUnit = prodUnit - , prsAllocationPercent = activityAllocationPercent proc - , prsAllocationFormula = activityAllocationFormula proc - , prsNativeType = activityNativeType proc - } - | procUUID <- uniqueUUIDs - , Just proc <- [findActivityByActivityUUID db procUUID] - , Just processId <- [findProcessIdForActivity db proc] - ] + [ mkActivitySummary db processId proc + | procUUID <- S.toList (S.fromList activityUUIDs) + , Just proc <- [findActivityByActivityUUID db procUUID] + , Just processId <- [findProcessIdForActivity db proc] + ] {- | Sentinel returned only when an exchange's unit UUID failed to resolve. The exchange unit-name field already surfaces the same gap via From 7fcd75d0571586be0c030873aa2c8c66fbac6621 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 17:50:48 +0200 Subject: [PATCH 31/43] ecospold1: dedup the two SAX folds into shared handlers (#94) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary `EcoSpold.Parser1` carried two ~95% identical `X.fold` parsers — `parseWithXeno` (first dataset) and `parseAllWithXeno` (all datasets) — including byte-for-byte copies of `buildExchange` and `buildResult`. This consolidates them and flattens the "triangular" handlers, with no behavior change. - **One shared handler set.** All SAX callbacks plus `buildExchange`/`buildResult` are lifted to top level. Both public functions become thin drivers over a single `foldEcoSpold1`, differing only in how they read the final `ParseState`. - **Flat guarded setters.** The per-field `if isElement … then … else ` attribute updates become guarded single-field setters (`setRefFunctionAttr`, `setExchangeAttr`). - **`buildExchange`** is now three top-level guards (`isWasteFlow` / `isBiosphere` / technosphere) over a shared `where`, instead of a nested-if pyramid. - **Close handlers** gain `popPath`/`popElement` helpers, removing repeated path/text-accumulator boilerplate. - **Exhaustive matches.** Every `ElementContext` and `Exchange` match is now total (no wildcards), so incomplete matches are caught at compile time. Net: 811 → 556 lines (−255), no new dependencies beyond `Data.Bifunctor`. ## Test plan - [x] `cabal build` — clean, no incomplete-pattern warnings - [x] `cabal run lca-tests -- --match "/EcoSpold1/"` — 48/48 pass (error cases, comments, waste-on-inputGroup=5, multi-dataset) - [x] Full suite — 1107 examples, 0 failures (LoaderSpec exercises the EcoSpold1 path) --- src/EcoSpold/Parser1.hs | 891 +++++++++++++++------------------------- 1 file changed, 327 insertions(+), 564 deletions(-) diff --git a/src/EcoSpold/Parser1.hs b/src/EcoSpold/Parser1.hs index 2b7eba0a..0c18de02 100644 --- a/src/EcoSpold/Parser1.hs +++ b/src/EcoSpold/Parser1.hs @@ -25,6 +25,7 @@ module EcoSpold.Parser1 ( ) where import Control.Monad (forM_) +import Data.Bifunctor (first) import qualified Data.ByteString as BS import Data.Either (lefts, rights) import qualified Data.Map as M @@ -147,303 +148,333 @@ initialParseState = , psCompletedActivities = [] } --- | Xeno SAX parser for EcoSpold1 -parseWithXeno :: BS.ByteString -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int) -parseWithXeno xmlContent = - case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of - Left err -> Left (show err) - Right finalState -> - case psCompletedActivities finalState of - (result : _) -> result - [] -> buildResult finalState +-- ---------------------------------------------------------------------------- +-- Shared SAX handlers (used by both parseWithXeno and parseAllWithXeno) +-- ---------------------------------------------------------------------------- + +-- | Drop the current element from the path, keeping accumulated text. +popPath :: ParseState -> ParseState +popPath s = s{psPath = drop 1 (psPath s)} + +-- | Drop the current element from the path and clear accumulated text. +popElement :: ParseState -> ParseState +popElement s = s{psPath = drop 1 (psPath s), psTextAccum = []} + +-- | Open tag: push the element onto the path and switch context on the +-- structural elements we care about. Groups only enter their context from +-- within an exchange; any other current context is preserved. +onOpenTag :: ParseState -> BS.ByteString -> ParseState +onOpenTag state tagName = + state{psPath = tagName : psPath state, psContext = newContext, psTextAccum = []} + where + enterGroup wrap = case psContext state of + InExchange edata -> wrap edata + ctx -> ctx + newContext + | isElement tagName "referenceFunction" = InReferenceFunction + | isElement tagName "geography" = InGeography + | isElement tagName "exchange" = InExchange emptyExchangeData + | isElement tagName "inputGroup" = enterGroup InInputGroup + | isElement tagName "outputGroup" = enterGroup InOutputGroup + | otherwise = psContext state + +-- | Attribute: route by current context to the matching field setter. +onAttribute :: ParseState -> BS.ByteString -> BS.ByteString -> ParseState +onAttribute state name value = case psContext state of + InReferenceFunction -> setRefFunctionAttr name value state + InGeography + | isElement name "location" -> state{psLocation = Just (bsToText value)} + | otherwise -> state + InExchange edata -> state{psContext = InExchange (setExchangeAttr name value edata)} + InInputGroup _ -> datasetNumberAttr + InOutputGroup _ -> datasetNumberAttr + Other -> datasetNumberAttr where - -- Open tag handler - openTag state tagName = - let newPath = tagName : psPath state - newContext - | isElement tagName "referenceFunction" = InReferenceFunction - | isElement tagName "geography" = InGeography - | isElement tagName "exchange" = InExchange emptyExchangeData - | isElement tagName "inputGroup" = - case psContext state of - InExchange edata -> InInputGroup edata -- Preserve exchange data - _ -> psContext state - | isElement tagName "outputGroup" = - case psContext state of - InExchange edata -> InOutputGroup edata -- Preserve exchange data - _ -> psContext state - | otherwise = psContext state - in state{psPath = newPath, psContext = newContext, psTextAccum = []} - - -- Attribute handler - attribute state name value = - case psContext state of - InReferenceFunction -> - let st = - state - { psActivityName = - if isElement name "name" - then Just (bsToText value) - else psActivityName state - , psRefUnit = - if isElement name "unit" - then Just (bsToText value) - else psRefUnit state - , psActivityCategory = - if isElement name "category" - then bsToText value - else psActivityCategory state - , psActivitySubCategory = - if isElement name "subCategory" - then bsToText value - else psActivitySubCategory state - , psDescription = - if isElement name "generalComment" && not (BS.null value) - then bsToText value : psDescription state - else psDescription state - } - in st - InGeography -> - if isElement name "location" - then state{psLocation = Just (bsToText value)} - else state - InExchange edata -> - let updated = - edata - { exNumber = if isElement name "number" then bsToInt value else exNumber edata - , exName = if isElement name "name" then bsToText value else exName edata - , exCategory = if isElement name "category" then bsToText value else exCategory edata - , exSubCategory = if isElement name "subCategory" then bsToText value else exSubCategory edata - , exLocation = if isElement name "location" then bsToText value else exLocation edata - , exUnit = if isElement name "unit" then bsToText value else exUnit edata - , exMeanValue = if isElement name "meanValue" then bsToDouble value else exMeanValue edata - , exCASNumber = if isElement name "CASNumber" then bsToText value else exCASNumber edata - , exFormula = if isElement name "formula" then bsToText value else exFormula edata - , exInfrastructure = - if isElement name "infrastructureProcess" - then bsToText value == "true" - else exInfrastructure edata - , exComment = if isElement name "generalComment" then bsToText value else exComment edata - } - in state{psContext = InExchange updated} - _ -> - -- Handle dataset number at top level - if isElement name "number" && any (isElement "dataset") (psPath state) - then state{psDatasetNumber = bsToInt value} - else state - - -- End of opening tag - endOpen state _tagName = state - - -- Text content handler - text state content = - let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content - in if BS.null trimmed - then state - else state{psTextAccum = trimmed : psTextAccum state} - - -- Close tag handler - closeTag state tagName - | isElement tagName "inputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InInputGroup edata -> - -- Restore parent exchange context with updated inputGroup - state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InExchange edata -> - state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "outputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InOutputGroup edata -> - -- Restore parent exchange context with updated outputGroup - state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InExchange edata -> - state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "exchange" = - case psContext state of - InExchange edata -> - let (exchange, parsedFlow, unit) = buildExchange (psDatasetNumber state) (psLocation state) edata - !supplierLinks = case exchange of - TechnosphereExchange{techRole = Input} - | exNumber edata /= 0 -> - M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state) - _ -> psSupplierLinks state - (techs, bios, wastes) = case parsedFlow of - ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state) - ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state) - ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state) - in state - { psExchanges = exchange : psExchanges state - , psTechFlows = techs - , psBioFlows = bios - , psWasteFlows = wastes - , psUnits = unit : psUnits state - , psSupplierLinks = supplierLinks - , psContext = Other - , psPath = drop 1 (psPath state) - , psTextAccum = [] - } - _ -> state{psPath = drop 1 (psPath state)} - | isElement tagName "referenceFunction" = - state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "geography" = - state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} - -- Handle dataset close tag: accumulate completed activity for multi-dataset files - | isElement tagName "dataset" = - let !result = buildResult state - -- Reset dataset-specific fields for next dataset - -- Preserve: psPath (after popping current element), psCompletedActivities - resetState = - state - { psCompletedActivities = result : psCompletedActivities state - , psDatasetNumber = 0 - , psActivityName = Nothing - , psActivityCategory = "" - , psActivitySubCategory = "" - , psLocation = Nothing - , psRefUnit = Nothing - , psDescription = [] - , psExchanges = [] - , psTechFlows = [] - , psBioFlows = [] - , psWasteFlows = [] - , psUnits = [] - , psContext = Other - , psTextAccum = [] - , psSupplierLinks = M.empty - } - in resetState{psPath = drop 1 (psPath state)} - | otherwise = - state{psPath = drop 1 (psPath state)} - - -- CDATA handler - cdata = text - - -- Build exchange, flow, and unit from exchange data - -- activityLoc is the activity's location for fallback - buildExchange :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit) - buildExchange datasetNum activityLoc edata = - let flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata) - unitId = generateUnitUUID (exUnit edata) - - -- Determine flow type from input/output groups - -- EcoSpold1 groups: - -- Input: 1-3 = technosphere, 4 = resource (biosphere) - -- Output: 0 = reference product, 1-3 = byproduct/co-product, 4 = emission (biosphere) - inputGroup = exInputGroup edata - outputGroup = exOutputGroup edata - - isBiosphere = inputGroup == "4" || outputGroup == "4" - isInput = not (T.null inputGroup) - isReferenceProduct = outputGroup == "0" - -- SimaPro's third flow class ('Final waste flows'). EcoSpold1 - -- exports surface them on inputGroup=5 to fit the 4-type - -- input/output model, but the category attribute survives. - -- Route to WasteExchange so they bypass cross-DB technosphere - -- linking (orphan outputs, not demands) and land in the - -- dedicated waste-side surfaces. - isWasteFlow = exCategory edata == "Final waste flows" - - -- Exchange location: use exchange's own location - -- For technosphere: leave empty if not specified, so Loader can use name-only lookup - -- For biosphere: fall back to activity location (biosphere flows don't need supplier linking) - exchangeLocation = - if T.null (exLocation edata) - then - if isBiosphere - then fromMaybe "" activityLoc - else "" -- Technosphere: leave empty for name-only lookup in Loader - else exLocation edata - - cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata) - unit = Unit unitId (exUnit edata) (exUnit edata) "" - - -- Role: EcoSpold1 never emits ReferenceInput (no waste-treatment encoding here) - techRoleFor - | isReferenceProduct = ReferenceProduct - | isInput = Input - | otherwise = Coproduct - in -- Set activityLinkId to nil - will be resolved later in Loader using - -- (flowName, exchangeLocation) lookup against supplier activities - if isWasteFlow - then - let wf = WasteFlow flowId (exName edata) unitId M.empty cas Nothing - ex = - WasteExchange - { waFlowId = flowId - , waAmount = exMeanValue edata - , waUnitId = unitId - , -- Final waste flows surface on inputGroup=5 - -- (consumer's POV: input from a hypothetical - -- treatment service). Preserve that semantic - -- by mirroring isInput here. - waIsInput = isInput - , waActivityLinkId = UUID.nil - , waProcessLinkId = Nothing - , waLocation = exchangeLocation - , waComment = nonEmptyText (exComment edata) - , waPedigree = Nothing - } - in (ex, ParsedWaste wf, unit) - else - if isBiosphere - then - let subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata) - compartment = - if T.null (exCategory edata) && isNothing subCat - then Nothing - else Just (Compartment (exCategory edata) subCat) - bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment - ex = - BiosphereExchange - { bioFlowId = flowId - , bioAmount = exMeanValue edata - , bioUnitId = unitId - , bioDirection = if inputGroup == "4" then Resource else Emission - , bioLocation = exchangeLocation - , bioComment = nonEmptyText (exComment edata) - , bioPedigree = Nothing - } - in (ex, ParsedBio bioFlow, unit) - else - let techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing - ex = - TechnosphereExchange - { techFlowId = flowId - , techAmount = exMeanValue edata - , techUnitId = unitId - , techRole = techRoleFor - , techActivityLinkId = UUID.nil - , techProcessLinkId = Nothing - , techLocation = exchangeLocation - , techComment = nonEmptyText (exComment edata) - , techPedigree = Nothing - } - in (ex, ParsedTech techFlow, unit) - - -- Build final result - buildResult :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int) - buildResult st = - let name = fromMaybe "Unknown Activity" (psActivityName st) - location = fromMaybe "GLO" (psLocation st) - refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st) - description = reverse (psDescription st) - classifications = - M.fromList $ - filter - (not . T.null . snd) - [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)] - activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing - techs = reverse (psTechFlows st) - bios = reverse (psBioFlows st) - wastes = reverse (psWasteFlows st) - units = reverse (psUnits st) - in case applyCutoffStrategy activity of - Right act -> Right (act, techs, bios, wastes, units, psDatasetNumber st, psSupplierLinks st) - Left err -> Left err + -- The dataset's numeric id lives on the top-level element. + datasetNumberAttr + | isElement name "number" && any (isElement "dataset") (psPath state) = + state{psDatasetNumber = bsToInt value} + | otherwise = state + +-- | Apply a single referenceFunction attribute to the parse state. +setRefFunctionAttr :: BS.ByteString -> BS.ByteString -> ParseState -> ParseState +setRefFunctionAttr name value st + | isElement name "name" = st{psActivityName = Just (bsToText value)} + | isElement name "unit" = st{psRefUnit = Just (bsToText value)} + | isElement name "category" = st{psActivityCategory = bsToText value} + | isElement name "subCategory" = st{psActivitySubCategory = bsToText value} + | isElement name "generalComment", not (BS.null value) = + st{psDescription = bsToText value : psDescription st} + | otherwise = st + +-- | Apply a single exchange attribute to the in-progress exchange. +setExchangeAttr :: BS.ByteString -> BS.ByteString -> ExchangeData -> ExchangeData +setExchangeAttr name value e + | isElement name "number" = e{exNumber = bsToInt value} + | isElement name "name" = e{exName = bsToText value} + | isElement name "category" = e{exCategory = bsToText value} + | isElement name "subCategory" = e{exSubCategory = bsToText value} + | isElement name "location" = e{exLocation = bsToText value} + | isElement name "unit" = e{exUnit = bsToText value} + | isElement name "meanValue" = e{exMeanValue = bsToDouble value} + | isElement name "CASNumber" = e{exCASNumber = bsToText value} + | isElement name "formula" = e{exFormula = bsToText value} + | isElement name "infrastructureProcess" = e{exInfrastructure = bsToText value == "true"} + | isElement name "generalComment" = e{exComment = bsToText value} + | otherwise = e + +-- | End of an opening tag: nothing to do for this format. +onEndOpen :: ParseState -> BS.ByteString -> ParseState +onEndOpen state _tagName = state + +-- | Accumulate non-blank text content (also used for CDATA). +onText :: ParseState -> BS.ByteString -> ParseState +onText state content = + let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content + in if BS.null trimmed + then state + else state{psTextAccum = trimmed : psTextAccum state} + +-- | Close tag: finalise the element that is ending. +onCloseTag :: ParseState -> BS.ByteString -> ParseState +onCloseTag state tagName + | isElement tagName "inputGroup" = closeGroup restoreInputGroup (\e t -> e{exInputGroup = t}) state + | isElement tagName "outputGroup" = closeGroup restoreOutputGroup (\e t -> e{exOutputGroup = t}) state + | isElement tagName "exchange" = closeExchange state + | isElement tagName "referenceFunction" = (popElement state){psContext = Other} + | isElement tagName "geography" = (popElement state){psContext = Other} + | isElement tagName "dataset" = closeDataset state + | otherwise = popPath state + +-- | Close an input/output group: fold its accumulated text into the parent +-- exchange's matching group field and return to the exchange context. +-- @ownGroup@ yields the exchange data to restore when the current context is +-- the group being closed (or, defensively, a bare exchange); any other context +-- just pops the element. The opposite group never restores, so a stray +-- inside an (malformed) is ignored, not merged. +closeGroup :: (ElementContext -> Maybe ExchangeData) -> (ExchangeData -> Text -> ExchangeData) -> ParseState -> ParseState +closeGroup ownGroup setField state = + case ownGroup (psContext state) of + Just edata -> (popElement state){psContext = InExchange (setField edata txt)} + Nothing -> popElement state + where + txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) + +-- | Exchange data to restore when closing an : the group we opened, +-- or (defensively) a bare exchange. The opposite group does not restore. +restoreInputGroup :: ElementContext -> Maybe ExchangeData +restoreInputGroup (InInputGroup edata) = Just edata +restoreInputGroup (InExchange edata) = Just edata +restoreInputGroup InOutputGroup{} = Nothing +restoreInputGroup InReferenceFunction = Nothing +restoreInputGroup InGeography = Nothing +restoreInputGroup Other = Nothing + +-- | Exchange data to restore when closing an : the group we opened, +-- or (defensively) a bare exchange. The opposite group does not restore. +restoreOutputGroup :: ElementContext -> Maybe ExchangeData +restoreOutputGroup (InOutputGroup edata) = Just edata +restoreOutputGroup (InExchange edata) = Just edata +restoreOutputGroup InInputGroup{} = Nothing +restoreOutputGroup InReferenceFunction = Nothing +restoreOutputGroup InGeography = Nothing +restoreOutputGroup Other = Nothing + +-- | Close an exchange: build its exchange/flow/unit, accumulate them, and +-- record the supplier link for technosphere inputs. +closeExchange :: ParseState -> ParseState +closeExchange state = case psContext state of + InExchange edata -> + let (exchange, parsedFlow, unit) = buildExchange (psDatasetNumber state) (psLocation state) edata + !supplierLinks = case exchange of + TechnosphereExchange{techRole = Input} + | exNumber edata /= 0 -> + M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state) + TechnosphereExchange{} -> psSupplierLinks state + BiosphereExchange{} -> psSupplierLinks state + WasteExchange{} -> psSupplierLinks state + (techs, bios, wastes) = case parsedFlow of + ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state) + ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state) + ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state) + in (popElement state) + { psExchanges = exchange : psExchanges state + , psTechFlows = techs + , psBioFlows = bios + , psWasteFlows = wastes + , psUnits = unit : psUnits state + , psSupplierLinks = supplierLinks + , psContext = Other + } + InInputGroup _ -> popPath state + InOutputGroup _ -> popPath state + InReferenceFunction -> popPath state + InGeography -> popPath state + Other -> popPath state + +-- | Close a dataset: snapshot the completed activity and reset per-dataset +-- accumulators for the next one (multi-dataset files). +closeDataset :: ParseState -> ParseState +closeDataset state = + let !result = buildResult state + in popPath ((resetDataset state){psCompletedActivities = result : psCompletedActivities state}) + +-- | Clear per-dataset accumulators, preserving cross-dataset state +-- (psPath and psCompletedActivities). +resetDataset :: ParseState -> ParseState +resetDataset state = + state + { psDatasetNumber = 0 + , psActivityName = Nothing + , psActivityCategory = "" + , psActivitySubCategory = "" + , psLocation = Nothing + , psRefUnit = Nothing + , psDescription = [] + , psExchanges = [] + , psTechFlows = [] + , psBioFlows = [] + , psWasteFlows = [] + , psUnits = [] + , psContext = Other + , psTextAccum = [] + , psSupplierLinks = M.empty + } + +{- | Build exchange, flow, and unit from exchange data. +@activityLoc@ is the activity's location, used as a biosphere fallback. + +EcoSpold1 groups: + Input: 1-3 = technosphere, 4 = resource (biosphere) + Output: 0 = reference product, 1-3 = byproduct/co-product, 4 = emission (biosphere) +-} +buildExchange :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit) +buildExchange datasetNum activityLoc edata + | isWasteFlow = (wasteEx, ParsedWaste wasteFlow, unit) + | isBiosphere = (bioEx, ParsedBio bioFlow, unit) + | otherwise = (techEx, ParsedTech techFlow, unit) + where + flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata) + unitId = generateUnitUUID (exUnit edata) + unit = Unit unitId (exUnit edata) (exUnit edata) "" + + inputGroup = exInputGroup edata + outputGroup = exOutputGroup edata + isBiosphere = inputGroup == "4" || outputGroup == "4" + isInput = not (T.null inputGroup) + isReferenceProduct = outputGroup == "0" + -- SimaPro's third flow class ('Final waste flows'). EcoSpold1 exports + -- surface them on inputGroup=5 to fit the 4-type input/output model, but + -- the category attribute survives. Routed to WasteExchange so they bypass + -- cross-DB technosphere linking (orphan outputs, not demands). + isWasteFlow = exCategory edata == "Final waste flows" + + -- Technosphere: leave empty if unspecified so the Loader can do name-only + -- lookup. Biosphere: fall back to the activity location (no supplier link). + exchangeLocation + | not (T.null (exLocation edata)) = exLocation edata + | isBiosphere = fromMaybe "" activityLoc + | otherwise = "" + + cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata) + + -- EcoSpold1 never emits ReferenceInput (no waste-treatment encoding here). + techRoleFor + | isReferenceProduct = ReferenceProduct + | isInput = Input + | otherwise = Coproduct + + -- activityLinkId stays nil; the Loader resolves it later via + -- (flowName, exchangeLocation) against supplier activities. + wasteFlow = WasteFlow flowId (exName edata) unitId M.empty cas Nothing + wasteEx = + WasteExchange + { waFlowId = flowId + , waAmount = exMeanValue edata + , waUnitId = unitId + , -- Mirror isInput: final waste flows surface on inputGroup=5 + -- (consumer's POV: input from a hypothetical treatment service). + waIsInput = isInput + , waActivityLinkId = UUID.nil + , waProcessLinkId = Nothing + , waLocation = exchangeLocation + , waComment = nonEmptyText (exComment edata) + , waPedigree = Nothing + } + + subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata) + compartment = + if T.null (exCategory edata) && isNothing subCat + then Nothing + else Just (Compartment (exCategory edata) subCat) + bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment + bioEx = + BiosphereExchange + { bioFlowId = flowId + , bioAmount = exMeanValue edata + , bioUnitId = unitId + , bioDirection = if inputGroup == "4" then Resource else Emission + , bioLocation = exchangeLocation + , bioComment = nonEmptyText (exComment edata) + , bioPedigree = Nothing + } + + techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing + techEx = + TechnosphereExchange + { techFlowId = flowId + , techAmount = exMeanValue edata + , techUnitId = unitId + , techRole = techRoleFor + , techActivityLinkId = UUID.nil + , techProcessLinkId = Nothing + , techLocation = exchangeLocation + , techComment = nonEmptyText (exComment edata) + , techPedigree = Nothing + } + +-- | Build the final per-dataset result, applying the cut-off strategy. +buildResult :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int) +buildResult st = + let name = fromMaybe "Unknown Activity" (psActivityName st) + location = fromMaybe "GLO" (psLocation st) + refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st) + description = reverse (psDescription st) + classifications = + M.fromList $ + filter + (not . T.null . snd) + [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)] + activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing + pack act = + ( act + , reverse (psTechFlows st) + , reverse (psBioFlows st) + , reverse (psWasteFlows st) + , reverse (psUnits st) + , psDatasetNumber st + , psSupplierLinks st + ) + in pack <$> applyCutoffStrategy activity + +-- | Run the shared SAX fold, surfacing any Xeno error as a String. +foldEcoSpold1 :: BS.ByteString -> Either String ParseState +foldEcoSpold1 = + first show . X.fold onOpenTag onAttribute onEndOpen onText onCloseTag onText initialParseState + +-- | Xeno SAX parser for EcoSpold1 — first dataset in the file. +parseWithXeno :: BS.ByteString -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int) +parseWithXeno xmlContent = do + finalState <- foldEcoSpold1 xmlContent + case psCompletedActivities finalState of + (result : _) -> result + [] -> buildResult finalState + +{- | Parse ALL datasets from an EcoSpold1 file (multi-dataset support). +Outer Either = XML parse failure; inner Either = per-activity failure. +-} +parseAllWithXeno :: BS.ByteString -> Either String [Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)] +parseAllWithXeno = fmap (reverse . psCompletedActivities) . foldEcoSpold1 -- | Parse EcoSpold1 file using Xeno SAX parser streamParseActivityAndFlowsFromFile1 :: FilePath -> IO (Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)) @@ -526,274 +557,6 @@ unmarkAsReference ex@WasteExchange{} = ex -- Multi-dataset file support -- ============================================================================ -{- | Parse ALL datasets from an EcoSpold1 file (multi-dataset support) -Returns the accumulated completed activities from psCompletedActivities -Outer Either = XML parse failure; inner Either = per-activity failure --} -parseAllWithXeno :: BS.ByteString -> Either String [Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int)] -parseAllWithXeno xmlContent = - case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of - Left err -> Left (show err) - Right finalState -> Right (reverse $ psCompletedActivities finalState) - where - -- Open tag handler - openTag state tagName = - let newPath = tagName : psPath state - newContext - | isElement tagName "referenceFunction" = InReferenceFunction - | isElement tagName "geography" = InGeography - | isElement tagName "exchange" = InExchange emptyExchangeData - | isElement tagName "inputGroup" = - case psContext state of - InExchange edata -> InInputGroup edata - _ -> psContext state - | isElement tagName "outputGroup" = - case psContext state of - InExchange edata -> InOutputGroup edata - _ -> psContext state - | otherwise = psContext state - in state{psPath = newPath, psContext = newContext, psTextAccum = []} - - -- Attribute handler - attribute state name value = - case psContext state of - InReferenceFunction -> - let st = - state - { psActivityName = - if isElement name "name" - then Just (bsToText value) - else psActivityName state - , psRefUnit = - if isElement name "unit" - then Just (bsToText value) - else psRefUnit state - , psActivityCategory = - if isElement name "category" - then bsToText value - else psActivityCategory state - , psActivitySubCategory = - if isElement name "subCategory" - then bsToText value - else psActivitySubCategory state - , psDescription = - if isElement name "generalComment" && not (BS.null value) - then bsToText value : psDescription state - else psDescription state - } - in st - InGeography -> - if isElement name "location" - then state{psLocation = Just (bsToText value)} - else state - InExchange edata -> - let updated = - edata - { exNumber = if isElement name "number" then bsToInt value else exNumber edata - , exName = if isElement name "name" then bsToText value else exName edata - , exCategory = if isElement name "category" then bsToText value else exCategory edata - , exSubCategory = if isElement name "subCategory" then bsToText value else exSubCategory edata - , exLocation = if isElement name "location" then bsToText value else exLocation edata - , exUnit = if isElement name "unit" then bsToText value else exUnit edata - , exMeanValue = if isElement name "meanValue" then bsToDouble value else exMeanValue edata - , exCASNumber = if isElement name "CASNumber" then bsToText value else exCASNumber edata - , exFormula = if isElement name "formula" then bsToText value else exFormula edata - , exInfrastructure = - if isElement name "infrastructureProcess" - then bsToText value == "true" - else exInfrastructure edata - , exComment = if isElement name "generalComment" then bsToText value else exComment edata - } - in state{psContext = InExchange updated} - _ -> - -- Handle dataset number at top level - if isElement name "number" && any (isElement "dataset") (psPath state) - then state{psDatasetNumber = bsToInt value} - else state - - -- End of opening tag - endOpen state _tagName = state - - -- Text content handler - text state content = - let trimmed = BS.dropWhile (== 32) $ BS.dropWhileEnd (== 32) content - in if BS.null trimmed - then state - else state{psTextAccum = trimmed : psTextAccum state} - - -- Close tag handler - closeTag state tagName - | isElement tagName "inputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InInputGroup edata -> - state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InExchange edata -> - state{psContext = InExchange edata{exInputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "outputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InOutputGroup edata -> - state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InExchange edata -> - state{psContext = InExchange edata{exOutputGroup = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "exchange" = - case psContext state of - InExchange edata -> - let (exchange, parsedFlow, unit) = buildExchangeForAll (psDatasetNumber state) (psLocation state) edata - !supplierLinks = case exchange of - TechnosphereExchange{techRole = Input} - | exNumber edata /= 0 -> - M.insert (exchangeFlowId exchange) (exNumber edata) (psSupplierLinks state) - _ -> psSupplierLinks state - (techs, bios, wastes) = case parsedFlow of - ParsedTech tf -> (tf : psTechFlows state, psBioFlows state, psWasteFlows state) - ParsedBio bf -> (psTechFlows state, bf : psBioFlows state, psWasteFlows state) - ParsedWaste wf -> (psTechFlows state, psBioFlows state, wf : psWasteFlows state) - in state - { psExchanges = exchange : psExchanges state - , psTechFlows = techs - , psBioFlows = bios - , psWasteFlows = wastes - , psUnits = unit : psUnits state - , psSupplierLinks = supplierLinks - , psContext = Other - , psPath = drop 1 (psPath state) - , psTextAccum = [] - } - _ -> state{psPath = drop 1 (psPath state)} - | isElement tagName "referenceFunction" = - state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} - | isElement tagName "geography" = - state{psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} - -- Handle dataset close tag: accumulate completed activity - | isElement tagName "dataset" = - let !result = buildResultForAll state - resetState = - state - { psCompletedActivities = result : psCompletedActivities state - , psDatasetNumber = 0 - , psActivityName = Nothing - , psActivityCategory = "" - , psActivitySubCategory = "" - , psLocation = Nothing - , psRefUnit = Nothing - , psDescription = [] - , psExchanges = [] - , psTechFlows = [] - , psBioFlows = [] - , psWasteFlows = [] - , psUnits = [] - , psContext = Other - , psTextAccum = [] - , psSupplierLinks = M.empty - } - in resetState{psPath = drop 1 (psPath state)} - | otherwise = - state{psPath = drop 1 (psPath state)} - - -- CDATA handler - cdata = text - - -- Build exchange, flow, and unit from exchange data (same logic as parseWithXeno) - buildExchangeForAll :: Int -> Maybe Text -> ExchangeData -> (Exchange, ParsedFlow, Unit) - buildExchangeForAll datasetNum activityLoc edata = - let flowId = generateFlowUUID datasetNum (exNumber edata) (exName edata) (exCategory edata) - unitId = generateUnitUUID (exUnit edata) - inputGroup = exInputGroup edata - outputGroup = exOutputGroup edata - isBiosphere = inputGroup == "4" || outputGroup == "4" - isInput = not (T.null inputGroup) - isReferenceProduct = outputGroup == "0" - isWasteFlow = exCategory edata == "Final waste flows" - exchangeLocation = - if T.null (exLocation edata) - then - if isBiosphere - then fromMaybe "" activityLoc - else "" - else exLocation edata - cas = if T.null (exCASNumber edata) then Nothing else Just (exCASNumber edata) - unit = Unit unitId (exUnit edata) (exUnit edata) "" - techRoleFor - | isReferenceProduct = ReferenceProduct - | isInput = Input - | otherwise = Coproduct - in if isWasteFlow - then - let wf = WasteFlow flowId (exName edata) unitId M.empty cas Nothing - ex = - WasteExchange - { waFlowId = flowId - , waAmount = exMeanValue edata - , waUnitId = unitId - , waIsInput = isInput - , waActivityLinkId = UUID.nil - , waProcessLinkId = Nothing - , waLocation = exchangeLocation - , waComment = nonEmptyText (exComment edata) - , waPedigree = Nothing - } - in (ex, ParsedWaste wf, unit) - else - if isBiosphere - then - let subCat = if T.null (exSubCategory edata) then Nothing else Just (exSubCategory edata) - compartment = - if T.null (exCategory edata) && isNothing subCat - then Nothing - else Just (Compartment (exCategory edata) subCat) - bioFlow = BiosphereFlow flowId (exName edata) unitId M.empty cas Nothing compartment - ex = - BiosphereExchange - { bioFlowId = flowId - , bioAmount = exMeanValue edata - , bioUnitId = unitId - , bioDirection = if inputGroup == "4" then Resource else Emission - , bioLocation = exchangeLocation - , bioComment = nonEmptyText (exComment edata) - , bioPedigree = Nothing - } - in (ex, ParsedBio bioFlow, unit) - else - let techFlow = TechnosphereFlow flowId (exName edata) unitId M.empty cas Nothing - ex = - TechnosphereExchange - { techFlowId = flowId - , techAmount = exMeanValue edata - , techUnitId = unitId - , techRole = techRoleFor - , techActivityLinkId = UUID.nil - , techProcessLinkId = Nothing - , techLocation = exchangeLocation - , techComment = nonEmptyText (exComment edata) - , techPedigree = Nothing - } - in (ex, ParsedTech techFlow, unit) - - -- Build final result for a single dataset - buildResultForAll :: ParseState -> Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit], Int, M.Map UUID Int) - buildResultForAll st = - let name = fromMaybe "Unknown Activity" (psActivityName st) - location = fromMaybe "GLO" (psLocation st) - refUnit = fromMaybe "UNKNOWN_UNIT" (psRefUnit st) - description = reverse (psDescription st) - classifications = - M.fromList $ - filter - (not . T.null . snd) - [("Category", psActivityCategory st), ("SubCategory", psActivitySubCategory st)] - activity = Activity name description M.empty classifications location refUnit (reverse $ psExchanges st) M.empty M.empty Nothing Nothing Nothing - techs = reverse (psTechFlows st) - bios = reverse (psBioFlows st) - wastes = reverse (psWasteFlows st) - units = reverse (psUnits st) - in case applyCutoffStrategy activity of - Right act -> Right (act, techs, bios, wastes, units, psDatasetNumber st, psSupplierLinks st) - Left err -> Left err - {- | Parse ALL datasets from a single EcoSpold1 file Used for multi-dataset files where contains multiple elements Skips activities that fail (e.g. no reference product) and logs warnings From d721d19b958b221d887817b602639bef22ef654a Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 17:51:07 +0200 Subject: [PATCH 32/43] simapro: refactor processBlockToActivity and its param plumbing (#95) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Refactors `SimaPro.Parser.processBlockToActivity` (the SimaPro `ProcessBlock` → `Activity` conversion) and the parameter plumbing feeding it. Pure readability/structure work — **no change to parsing output**. - **Parameter environment**: the six-step `env0 → env4 → env` staircase becomes a single pure `buildParamEnv` that folds over ordered parameter groups — one pass for input params, fixed-point iteration for calculated params (forward references). It's now a reusable, independently testable function. - **Row decomposition**: the ~40-line block of hand-written triple-projection lambdas (`\(e,_,_) -> e`, …) becomes `unzip3` + `catMaybes`. The artificial `techTriples`/`treatmentTriples` split is gone (they were always concatenated). The per-product body moved into a `where`-clause and the two loop-invariant activity fields (`description`, `nativeType`) are hoisted out of the per-coproduct lambda. - **Param plumbing**: database/project params now travel as a named `GlobalParams` record (with a `Monoid` instance) plus a `WorkerResult` record for `parseWorkerLines`, replacing four identically-typed positional `[(Text,Text)]` tuples. The parallel-worker merge collapses to `foldMap`, and the four same-typed lists can no longer be swapped by mistake. Net effect: `processBlockToActivity` drops from ~130 lines / 4 nesting levels to ~90 lines / mostly 2 levels, and the parameter logic becomes reusable units. ## Test plan - [x] `cabal build` (library + executable) — clean - [x] `cabal test lca-tests --test-options='--match "SimaPro"'` — 111/111 pass - [x] `cabal test lca-tests` — 1107 examples pass (the only failures were `Server`/`Routes` integration tests that spawn the `volca` binary; they pass once the executable is built) - [x] Behaviour-preservation verified against existing coverage: coproducts & shared activityUUID, allocation % + formula, per-coproduct exchange scaling, db/project/process param envs, final-waste routing, all biosphere sections, native process type --- src/SimaPro/Parser.hs | 298 ++++++++++++++++++++++-------------------- 1 file changed, 157 insertions(+), 141 deletions(-) diff --git a/src/SimaPro/Parser.hs b/src/SimaPro/Parser.hs index 2dc42bb4..03a7dbfd 100644 --- a/src/SimaPro/Parser.hs +++ b/src/SimaPro/Parser.hs @@ -34,6 +34,7 @@ module SimaPro.Parser ( import Control.Concurrent.Async (mapConcurrently) import Control.DeepSeq (NFData, force) import Control.Exception (evaluate) +import Control.Monad (mfilter) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import qualified Data.ByteString.Lazy as BL @@ -41,7 +42,7 @@ import Data.Char (isUpper, toLower) import qualified Data.Csv as Csv import Data.List (dropWhileEnd) import qualified Data.Map.Strict as M -import Data.Maybe (isNothing) +import Data.Maybe (catMaybes, isNothing, maybeToList) import qualified Data.Set as S import Data.Text (Text) import qualified Data.Text as T @@ -241,6 +242,41 @@ data ParseAcc = ParseAcc , paProjCalcParams :: ![(Text, Text)] } +-- ============================================================================ +-- Global parameter bundle +-- ============================================================================ + +{- | Parameters declared outside any single process block — database- and +project-level Input/Calculated params — threaded into every block's evaluation +environment. The 'Monoid' instance merges the params each parallel worker +collected from its own chunk. +-} +data GlobalParams = GlobalParams + { gpDbInput :: ![(Text, Text)] + , gpDbCalc :: ![(Text, Text)] + , gpProjInput :: ![(Text, Text)] + , gpProjCalc :: ![(Text, Text)] + } + deriving (Show, Eq, Generic) + +instance NFData GlobalParams + +instance Semigroup GlobalParams where + GlobalParams a1 b1 c1 d1 <> GlobalParams a2 b2 c2 d2 = + GlobalParams (a1 <> a2) (b1 <> b2) (c1 <> c2) (d1 <> d2) + +instance Monoid GlobalParams where + mempty = GlobalParams [] [] [] [] + +-- | Output of parsing one contiguous chunk of lines. +data WorkerResult = WorkerResult + { wrBlocks :: ![ProcessBlock] + , wrParams :: !GlobalParams + } + deriving (Show, Eq, Generic) + +instance NFData WorkerResult + -- ============================================================================ -- Header Parsing -- ============================================================================ @@ -760,140 +796,121 @@ resolveAmount env raw fallback Right v -> v Left _ -> fallback +{- | Build the resolved parameter environment and the raw-expression map from +ordered parameter groups. Input groups are resolved with a single pass each; +calc groups iterate to a fixed point (to resolve forward references where a +param depends on one defined later in the CSV). Groups are ordered low→high +precedence: database, project, process. +-} +buildParamEnv :: [[(Text, Text)]] -> [[(Text, Text)]] -> (M.Map Text Double, M.Map Text Text) +buildParamEnv inputGroups calcGroups = + ( foldl' evalToFixpoint (foldl' (foldl' evalParam) M.empty inputGroups) calcGroups + , M.fromList (concat (inputGroups ++ calcGroups)) + ) + where + evalParam acc (name, rawVal) = + either (const acc) (\v -> M.insert name v acc) (Expr.evaluate acc rawVal) + evalToFixpoint acc params = + let acc' = foldl' evalParam acc params + in if M.size acc' == M.size acc then acc' else evalToFixpoint acc' params + {- | Convert ProcessBlock to list of Activities (one per product) This matches EcoSpold behavior where multi-product processes create multiple activities Global params (db + project level) are passed in and combined with process-level params. -} processBlockToActivity :: UnitConversion.UnitConfig -> - ([(Text, Text)], [(Text, Text)], [(Text, Text)], [(Text, Text)]) -> + GlobalParams -> ProcessBlock -> [(Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit])] -processBlockToActivity unitCfg (dbInputPs, dbCalcPs, projInputPs, projCalcPs) ProcessBlock{..} = - let - -- Build parameter environment: input params first, then calculated params - evalParam acc (name, rawVal) = case Expr.evaluate acc rawVal of - Right v -> M.insert name v acc - Left _ -> acc - -- Fixed-point iteration for calculated params: repeat until no new variables resolve - -- (handles forward references where a param depends on one defined later in the CSV) - evalCalcParams acc params = - let acc' = foldl' evalParam acc params - in if M.size acc' == M.size acc then acc' else evalCalcParams acc' params - env0 = foldl' evalParam M.empty (reverse dbInputPs) - env1 = foldl' evalParam env0 (reverse projInputPs) - env2 = foldl' evalParam env1 (reverse pbInputParams) - env3 = evalCalcParams env2 (reverse dbCalcPs) - env4 = evalCalcParams env3 (reverse projCalcPs) - env = evalCalcParams env4 (reverse pbCalcParams) - - -- Raw expression map for re-evaluation - allExprs = - reverse dbInputPs - ++ reverse projInputPs - ++ reverse pbInputParams - ++ reverse dbCalcPs - ++ reverse projCalcPs - ++ reverse pbCalcParams - exprMap = M.fromList allExprs - - -- Extract location from process name if not specified - (cleanProcessNameRaw, locFromName) = extractLocation pbName - location = if T.null pbLocation || T.toLower pbLocation == "unspecified" then locFromName else pbLocation - -- Trimmed Process name (without curly-brace location tag). Empty when - -- the SimaPro "Process name" field is empty (typical for mono-product - -- blocks where only the Product line carries the human-readable name). - processNameTrimmed = T.strip cleanProcessNameRaw - - -- Convert all rows in one pass, collecting exchanges/flows/units together - avoidedTriples = map (productToExchange unitCfg env False) pbAvoidedProducts - techTriples = map (techRowToExchange env) (pbMaterials ++ pbElectricity) - treatmentTriples = map (techRowToExchange env) pbWasteToTreatment - -- SimaPro's 'Final waste flows' section: outputs the activity 'throws - -- away' with no modelled treatment in the dataset. Route to - -- WasteExchange so the cross-DB linker doesn't tally them as missing - -- suppliers (they're end-of-life markers, not demands). - finalWasteTriples = map (wasteRowToExchange env) pbFinalWaste - bioTriples = - map (bioRowToExchange env True "resource") pbResources - ++ map (bioRowToExchange env False "air") pbEmissionsAir - ++ map (bioRowToExchange env False "water") pbEmissionsWater - ++ map (bioRowToExchange env False "soil") pbEmissionsSoil - - -- Tech rows may have zero amounts (Maybe Exchange), others always have exchanges - techRowExchanges = [e | (Just e, _, _) <- techTriples ++ treatmentTriples] - techRowFlows = [f | (_, f, _) <- techTriples ++ treatmentTriples] - techRowUnits = [u | (_, _, u) <- techTriples ++ treatmentTriples] - - sharedExchanges = - map (\(e, _, _) -> e) avoidedTriples - ++ techRowExchanges - ++ map (\(e, _, _) -> e) bioTriples - ++ map (\(e, _, _) -> e) finalWasteTriples - sharedTechFlows = map (\(_, f, _) -> f) avoidedTriples ++ techRowFlows - sharedBioFlows = map (\(_, f, _) -> f) bioTriples - sharedWasteFlows = map (\(_, f, _) -> f) finalWasteTriples - sharedUnits = - S.toList . S.fromList $ - map (\(_, _, u) -> unitName u) avoidedTriples - ++ map unitName techRowUnits - ++ map (\(_, _, u) -> unitName u) bioTriples - ++ map (\(_, _, u) -> unitName u) finalWasteTriples - - -- Create one activity per product - makeActivity :: ProductRow -> (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit]) - makeActivity prod = - let (productExchange, productFlow, productUnit) = productToExchange unitCfg env True prod - effUnitName = unitName productUnit - allocPercent = resolveAmount env (prAllocRaw prod) (prAllocation prod) - allocFraction = allocPercent / 100.0 - (cleanProductName, locFromProduct) = extractLocation (prName prod) - effectiveLoc = if T.null location then locFromProduct else location - -- Activity name = Process name when present (so coproducts of - -- the same Process share one activityUUID via generateActivityUUID), - -- otherwise fall back to product name (mono-product blocks - -- with empty "Process name" field). - effectiveActivityName = - if T.null processNameTrimmed then cleanProductName else processNameTrimmed - allocFormulaRaw = T.strip (prAllocRaw prod) - allocFormula = - if T.null allocFormulaRaw || isNumericFormula allocFormulaRaw - then Nothing - else Just allocFormulaRaw - activity = - Activity - { activityName = effectiveActivityName - , activityDescription = if T.null pbComment then [] else [pbComment] - , activitySynonyms = M.empty - , activityClassification = - M.fromList $ - filter - (not . T.null . snd) - [ ("Category type", pbCategoryType) - , ("Category", prCategory prod) - ] - , activityLocation = effectiveLoc - , activityUnit = effUnitName - , exchanges = productExchange : map (scaleExchange allocFraction) sharedExchanges - , activityParams = env - , activityParamExprs = exprMap - , activityAllocationPercent = Just allocPercent - , activityAllocationFormula = allocFormula - , activityNativeType = - if T.null pbType - then Nothing - else Just (SimaProProcessType{sptLabel = pbType}) - } - allTechFlows = productFlow : sharedTechFlows - allBioFlows = sharedBioFlows - allWasteFlows = sharedWasteFlows - allUnits = - map - (\name -> Unit (generateUnitUUID name) name name "") - (S.toList . S.fromList $ effUnitName : sharedUnits) - in (activity, allTechFlows, allBioFlows, allWasteFlows, allUnits) - in - map makeActivity pbProducts +processBlockToActivity unitCfg GlobalParams{..} ProcessBlock{..} = + map makeActivity pbProducts + where + (env, exprMap) = + buildParamEnv + (reverse <$> [gpDbInput, gpProjInput, pbInputParams]) + (reverse <$> [gpDbCalc, gpProjCalc, pbCalcParams]) + + -- Extract location from process name if not specified. + (cleanProcessNameRaw, locFromName) = extractLocation pbName + location = + if T.null pbLocation || T.toLower pbLocation == "unspecified" + then locFromName + else pbLocation + -- Trimmed Process name (without curly-brace location tag). Empty when the + -- SimaPro "Process name" field is empty (typical for mono-product blocks + -- where only the Product line carries the human-readable name). + processNameTrimmed = T.strip cleanProcessNameRaw + + -- Convert each section's rows to (exchange, flow, unit) triples in one pass. + -- 'Final waste flows' route to WasteExchange so the cross-DB linker doesn't + -- tally them as missing suppliers (they're end-of-life markers, not demands). + (avoidedExs, avoidedFlows, avoidedUnits) = + unzip3 (productToExchange unitCfg env False <$> pbAvoidedProducts) + (techMaybeExs, techFlows, techUnits) = + unzip3 (techRowToExchange env <$> (pbMaterials ++ pbElectricity ++ pbWasteToTreatment)) + (bioExs, bioFlows, bioUnits) = + unzip3 $ + (bioRowToExchange env True "resource" <$> pbResources) + ++ (bioRowToExchange env False "air" <$> pbEmissionsAir) + ++ (bioRowToExchange env False "water" <$> pbEmissionsWater) + ++ (bioRowToExchange env False "soil" <$> pbEmissionsSoil) + (wasteExs, wasteFlows, wasteUnits) = + unzip3 (wasteRowToExchange env <$> pbFinalWaste) + + -- Exchanges/flows/units shared by every coproduct (scaled per product below). + -- Tech rows with a zero amount yield no exchange but still contribute a flow. + sharedExchanges = avoidedExs ++ catMaybes techMaybeExs ++ bioExs ++ wasteExs + sharedTechFlows = avoidedFlows ++ techFlows + sharedBioFlows = bioFlows + sharedWasteFlows = wasteFlows + sharedUnitNames = + S.toList . S.fromList $ unitName <$> (avoidedUnits ++ techUnits ++ bioUnits ++ wasteUnits) + + -- Loop-invariant activity fields (independent of the coproduct). + descriptionLines = maybeToList (nonEmptyText pbComment) + nativeType = SimaProProcessType <$> nonEmptyText pbType + + makeActivity :: ProductRow -> (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit]) + makeActivity prod = + let (productExchange, productFlow, productUnit) = productToExchange unitCfg env True prod + effUnitName = unitName productUnit + allocPercent = resolveAmount env (prAllocRaw prod) (prAllocation prod) + allocFraction = allocPercent / 100.0 + (cleanProductName, locFromProduct) = extractLocation (prName prod) + effectiveLoc = if T.null location then locFromProduct else location + -- Activity name = Process name when present (so coproducts of the same + -- Process share one activityUUID via generateActivityUUID), otherwise + -- fall back to product name (mono-product blocks with empty field). + effectiveActivityName = + if T.null processNameTrimmed then cleanProductName else processNameTrimmed + allocFormula = mfilter (not . isNumericFormula) (nonEmptyText (prAllocRaw prod)) + activity = + Activity + { activityName = effectiveActivityName + , activityDescription = descriptionLines + , activitySynonyms = M.empty + , activityClassification = + M.fromList $ + filter + (not . T.null . snd) + [ ("Category type", pbCategoryType) + , ("Category", prCategory prod) + ] + , activityLocation = effectiveLoc + , activityUnit = effUnitName + , exchanges = productExchange : map (scaleExchange allocFraction) sharedExchanges + , activityParams = env + , activityParamExprs = exprMap + , activityAllocationPercent = Just allocPercent + , activityAllocationFormula = allocFormula + , activityNativeType = nativeType + } + allUnits = + map + (\name -> Unit (generateUnitUUID name) name name "") + (S.toList . S.fromList $ effUnitName : sharedUnitNames) + in (activity, productFlow : sharedTechFlows, sharedBioFlows, sharedWasteFlows, allUnits) -- | True when the raw allocation cell is a plain decimal literal (no formula). isNumericFormula :: Text -> Bool @@ -1250,7 +1267,7 @@ splitForWorkers numWorkers allLines chopAtEnds target endCount (l : acc) ls -- | Parse a contiguous range of lines into ProcessBlocks + global params. -parseWorkerLines :: SimaProConfig -> [BS.ByteString] -> ([ProcessBlock], [(Text, Text)], [(Text, Text)], [(Text, Text)], [(Text, Text)]) +parseWorkerLines :: SimaProConfig -> [BS.ByteString] -> WorkerResult parseWorkerLines cfg ls = let initAcc = ParseAcc @@ -1265,12 +1282,16 @@ parseWorkerLines cfg ls = , paProjCalcParams = [] } finalAcc = foldl' processLine initAcc ls - in ( reverse (paBlocks finalAcc) - , paDbInputParams finalAcc - , paDbCalcParams finalAcc - , paProjInputParams finalAcc - , paProjCalcParams finalAcc - ) + in WorkerResult + { wrBlocks = reverse (paBlocks finalAcc) + , wrParams = + GlobalParams + { gpDbInput = paDbInputParams finalAcc + , gpDbCalc = paDbCalcParams finalAcc + , gpProjInput = paProjInputParams finalAcc + , gpProjCalc = paProjCalcParams finalAcc + } + } -- ============================================================================ -- Main Entry Point @@ -1304,13 +1325,8 @@ parseSimaProCSV unitCfg path = do -- Parse chunks in parallel — each worker folds its contiguous range results <- mapConcurrently (evaluate . force . parseWorkerLines cfg) workerChunks - let allBlocks = concatMap (\(b, _, _, _, _) -> b) results - globalParams = - ( concatMap (\(_, a, _, _, _) -> a) results - , concatMap (\(_, _, b, _, _) -> b) results - , concatMap (\(_, _, _, c, _) -> c) results - , concatMap (\(_, _, _, _, d) -> d) results - ) + let allBlocks = concatMap wrBlocks results + globalParams = foldMap wrParams results -- Convert all blocks to activities (one activity per product) - PARALLEL converted <- concat <$> mapConcurrently (evaluate . force . processBlockToActivity unitCfg globalParams) allBlocks From 829b74c6acba739e8e7f400e8286b905a260d2b7 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 17:51:43 +0200 Subject: [PATCH 33/43] refactor(method): model SimaPro method parser state as one Stage ADT (#96) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Continues the PR #87 line of cleanup, this time on `Method.ParserSimaPro` — the 12-phase fold that parses SimaPro LCIA method CSV exports. The phase lived in a `Phase` enum next to a flat 13-field `ParseState` whose per-section accumulators were always present, so the type permitted a phase to disagree with the data it held. This change folds phase + accumulator into a single `Stage` sum type whose constructors carry exactly what each phase needs, making those impossible states unrepresentable. It also factors out the duplicated leaf logic and removes a partial function. ## What changed - **`Stage` ADT** replaces `Phase` + the flat accumulator fields. `CatAccum` / `DamageAccum` / `NWAccum` hold the in-progress block; completed lists and the constant methodology name move to a slim `ParseState` (so `step` drops its threaded methodology argument). - **Shared helpers** pulled out of the `step` `case`: `parseCFRow` (the inlined ~35-line CF builder), one `parseNameValue` for the three identical `name;value` rows, `parseNameUnit` for both `Name;Unit` headers, `buildMethod`, and `detectMarker`/`stageFor` mirroring the sibling `SimaPro.Parser.detectSection`. - **`finalize` collapsed** from re-implemented builders to a 6-line read-out that reuses the same `finishCat`/`finishDamage`/`finishNW` finishers as `step`, via one `finishCurrent` dispatcher. - **Partial function removed**: `parseNameUnit` is total, deleting `head'` and its `error` call. ## Behaviour Output is unchanged on well-formed SimaPro files (verified by tracing the fixture through every transition). Three harmonisations are unreachable on valid exports: empty blocks are no longer emitted, and a truncated file lacking a terminal `End` now keeps its trailing block and its methodology instead of dropping them. Net: `216 insertions, 233 deletions` in one file. ## Test plan - [x] `cabal build lib:volca` — clean, no incomplete-pattern warnings on the new `Stage`/`step` - [x] `cabal run lca-tests -- --match "/Method"` — 115 examples, 0 failures (14 SimaPro method cases + standalone NW parser among them) --- src/Method/ParserSimaPro.hs | 459 ++++++++++++++++++------------------ 1 file changed, 226 insertions(+), 233 deletions(-) diff --git a/src/Method/ParserSimaPro.hs b/src/Method/ParserSimaPro.hs index a24b45e7..0cae0dc7 100644 --- a/src/Method/ParserSimaPro.hs +++ b/src/Method/ParserSimaPro.hs @@ -50,8 +50,8 @@ parseSimaProMethodCSVBytes raw = let !utf8 = ensureUtf8 raw lns = BS8.lines utf8 cfg = parseConfig lns - methodName' = parseMethodName cfg lns - result = foldl' (step cfg methodName') initState lns + methodologyName = parseMethodName cfg lns + result = foldl' (step cfg) (initState methodologyName) lns in Right (finalize result) {- | Detect whether bytes are a SimaPro method CSV export. @@ -74,263 +74,260 @@ isSimaProMethodCSV bs = -- Parser State -- ============================================================================ +-- | In-progress accumulators. Each carries exactly the data its stage needs, +-- so the parser can never hold (say) lingering CFs while reading an NW set. +data CatAccum = CatAccum !Text !Text ![MethodCF] +data DamageAccum = DamageAccum !Text !Text ![(Text, Double)] +data NWAccum = NWAccum !Text !(M.Map Text Double) !(M.Map Text Double) + +{- | The single source of truth for "where we are and what we're collecting". +Folds the old @Phase@ enum together with the per-section accumulators: an +impossible state (a phase disagreeing with its accumulator) is unrepresentable. +-} +data Stage + = Header -- reading {key: value} header lines + | MethodMeta -- skipping method-level metadata until a section marker + | BetweenSections -- finished a block; expecting the next marker or End + | NeedCatLine -- expecting "Name;Unit" after "Impact category" + | NeedSubstances !Text !Text -- have cat name+unit; expecting "Substances" + | ReadingCFs !CatAccum -- reading substance/CF rows + | NeedDcLine -- expecting "Name;Unit" after "Damage category" + | NeedDcImpacts !Text !Text -- expecting "Impact categories" marker + | ReadingDcImpacts !DamageAccum -- reading impact rows of a damage category + | NeedNWName -- expecting the NW-set name line + | NeedNWSection !NWAccum -- expecting "Normalization"/"Weighting"/next marker + | ReadingNorm !NWAccum -- reading normalization rows + | ReadingWeight !NWAccum -- reading weighting rows + data ParseState = ParseState - { psPhase :: !Phase - , psCatName :: !Text -- current impact category name - , psCatUnit :: !Text -- current impact category unit - , psFactors :: ![MethodCF] -- CFs accumulated (reversed) for current category + { psStage :: !Stage + , psMethodology :: !Text -- constant; the method-level "Name" , psMethods :: ![Method] -- completed methods (reversed) - -- Damage categories , psDamageCats :: ![DamageCategory] -- completed damage categories (reversed) - , psDcName :: !Text -- current damage category name - , psDcUnit :: !Text -- current damage category unit - , psDcImpacts :: ![(Text, Double)] -- current damage category impacts (reversed) - -- Normalization/Weighting , psNWsets :: ![NormWeightSet] -- completed NW sets (reversed) - , psNWname :: !Text -- current NW set name - , psNormMap :: !(M.Map Text Double) -- current normalization factors - , psWeightMap :: !(M.Map Text Double) -- current weighting factors } -data Phase - = PhHeader -- reading {key: value} header lines - | PhMethodMeta -- reading method-level metadata (Name, Version, etc.) - | PhExpectCategory -- expecting "Impact category" or "Damage category" etc. - | PhExpectCatLine -- expecting the "Name;Unit" line after "Impact category" - | PhExpectSubst -- expecting blank or "Substances" marker - | PhReadingCFs -- reading substance/CF rows - -- Damage categories - | PhExpectDcLine -- expecting "Name;Unit" after "Damage category" - | PhExpectDcImpacts -- expecting "Impact categories" marker - | PhReadingDcImpacts -- reading impact category rows in damage category - -- Normalization/Weighting - | PhExpectNWname -- expecting NW set name line - | PhExpectNWsection -- expecting "Normalization" or "Weighting" or next section - | PhReadingNorm -- reading normalization rows - | PhReadingWeight -- reading weighting rows - -initState :: ParseState -initState = ParseState PhHeader "" "" [] [] [] "" "" [] [] "" M.empty M.empty +initState :: Text -> ParseState +initState methodology = ParseState Header methodology [] [] [] -- ============================================================================ -- State Machine -- ============================================================================ -step :: SimaProConfig -> Text -> ParseState -> BS.ByteString -> ParseState -step cfg methodologyName st line = case psPhase st of - PhHeader +step :: SimaProConfig -> ParseState -> BS.ByteString -> ParseState +step cfg st line = case psStage st of + Header | BS8.isPrefixOf "{" line -> st - | otherwise -> st{psPhase = PhMethodMeta} - PhMethodMeta - | stripped == "Impact category" -> st{psPhase = PhExpectCatLine} - | stripped == "Damage category" -> st{psPhase = PhExpectDcLine} - | isNWsetMarker stripped -> st{psPhase = PhExpectNWname} + | otherwise -> st{psStage = MethodMeta} + MethodMeta + | Just m <- detectMarker stripped -> st{psStage = stageFor m} | otherwise -> st - PhExpectCategory - | stripped == "Impact category" -> st{psPhase = PhExpectCatLine} - | stripped == "Damage category" -> st{psPhase = PhExpectDcLine} - | isNWsetMarker stripped -> st{psPhase = PhExpectNWname} - | stripped == "End" -> st + BetweenSections + | Just m <- detectMarker stripped -> st{psStage = stageFor m} | otherwise -> st - PhExpectCatLine + NeedCatLine | isBlank line -> st | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - catName = decodeBS (BS8.strip (head' fields)) - catUnit = - if length fields > 1 - then decodeBS (BS8.strip (fields !! 1)) - else "" - in st - { psPhase = PhExpectSubst - , psCatName = catName - , psCatUnit = catUnit - , psFactors = [] - } - PhExpectSubst + let (name, unit) = parseNameUnit cfg line + in st{psStage = NeedSubstances name unit} + NeedSubstances name unit | isBlank line -> st - | stripped == "Substances" -> st{psPhase = PhReadingCFs} + | stripped == "Substances" -> st{psStage = ReadingCFs (CatAccum name unit [])} | otherwise -> st - PhReadingCFs - | isBlank line -> finishCategory st - | stripped == "Impact category" -> - (finishCategory st){psPhase = PhExpectCatLine} - | stripped == "Damage category" -> - (finishCategory st){psPhase = PhExpectDcLine} - | isNWsetMarker stripped -> - (finishCategory st){psPhase = PhExpectNWname} - | stripped == "End" -> finishCategory st - | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - in case fields of - (comp : sub : name : cas : cfVal : cfUnit : _) -> - let !rawName = decodeBS (BS8.strip name) - -- Keep the full suffixed name so the CF's - -- 'mcfFlowRef' UUID matches the suffixed - -- biosphere flow UUID parsed by - -- 'SimaPro.Parser.bioRowToExchange'. The location - -- is also exposed via 'mcfConsumerLocation' for - -- regional dispatch on engines that key CFs by - -- activity location (openLCA JSON-LD); SimaPro - -- CSV CFs are already region-tagged in the name, - -- so dual storage is correct. - !mLoc = snd (extractLocationSuffix rawName) - !cfUnitT = decodeBS (BS8.strip cfUnit) - -- UUID hashed via the shared 'generateFlowUUID' + - -- 'normalizeSimaProCompartment' so the CF side - -- and 'SimaPro.Parser.bioRowToExchange' produce - -- the same UUID for the same flow. - !flowRef = - generateFlowUUID - rawName - (normalizeSimaProCompartment (decodeBS comp) (decodeBS sub)) - cfUnitT - !cf = - MethodCF - { mcfFlowRef = flowRef - , mcfFlowName = rawName - , mcfDirection = direction comp - , mcfValue = parseAmount (spDecimal cfg) (BS8.strip cfVal) - , mcfCompartment = mkCompartment comp sub - , mcfCAS = normalizeCAS (decodeBS (BS8.strip cas)) - , mcfUnit = cfUnitT - , mcfConsumerLocation = mLoc - } - in st{psFactors = cf : psFactors st} - _ -> st - -- Damage category parsing - PhExpectDcLine + ReadingCFs acc + | isBlank line -> finishCat acc st{psStage = BetweenSections} + | stripped == "End" -> finishCat acc st{psStage = BetweenSections} + | Just m <- detectMarker stripped -> finishCat acc st{psStage = stageFor m} + | Just cf <- parseCFRow cfg line -> st{psStage = ReadingCFs (consCF cf acc)} + | otherwise -> st + NeedDcLine | isBlank line -> st | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - dcn = decodeBS (BS8.strip (head' fields)) - dcu = if length fields > 1 then decodeBS (BS8.strip (fields !! 1)) else "" - in st{psPhase = PhExpectDcImpacts, psDcName = dcn, psDcUnit = dcu, psDcImpacts = []} - PhExpectDcImpacts + let (name, unit) = parseNameUnit cfg line + in st{psStage = NeedDcImpacts name unit} + NeedDcImpacts name unit | isBlank line -> st - | stripped == "Impact categories" -> st{psPhase = PhReadingDcImpacts} + | stripped == "Impact categories" -> st{psStage = ReadingDcImpacts (DamageAccum name unit [])} | otherwise -> st - PhReadingDcImpacts - | isBlank line -> finishDamageCategory st - | stripped == "Damage category" -> - (finishDamageCategory st){psPhase = PhExpectDcLine} - | isNWsetMarker stripped -> - (finishDamageCategory st){psPhase = PhExpectNWname} - | stripped == "End" -> finishDamageCategory st - | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - in case fields of - (name : val : _) -> - let n = decodeBS (BS8.strip name) - v = parseAmount (spDecimal cfg) (BS8.strip val) - in st{psDcImpacts = (n, v) : psDcImpacts st} - _ -> st - -- Normalization/Weighting parsing - PhExpectNWname + ReadingDcImpacts acc + | isBlank line -> finishDamage acc st{psStage = BetweenSections} + | stripped == "End" -> finishDamage acc st{psStage = BetweenSections} + | Just m <- detectMarker stripped -> finishDamage acc st{psStage = stageFor m} + | Just nv <- parseNameValue cfg line -> st{psStage = ReadingDcImpacts (consImpact nv acc)} + | otherwise -> st + NeedNWName | isBlank line -> st - | otherwise -> st{psPhase = PhExpectNWsection, psNWname = decodeBS (BS8.strip line)} - PhExpectNWsection + | otherwise -> st{psStage = NeedNWSection (NWAccum (decodeBS stripped) M.empty M.empty)} + NeedNWSection acc | isBlank line -> st - | stripped == "Normalization" -> st{psPhase = PhReadingNorm} - | stripped == "Weighting" -> st{psPhase = PhReadingWeight} - | stripped == "Damage category" -> - (finishNWset st){psPhase = PhExpectDcLine} - | isNWsetMarker stripped -> - (finishNWset st){psPhase = PhExpectNWname} - | stripped == "End" -> finishNWset st + | stripped == "Normalization" -> st{psStage = ReadingNorm acc} + | stripped == "Weighting" -> st{psStage = ReadingWeight acc} + | stripped == "End" -> finishNW acc st{psStage = BetweenSections} + | Just m <- detectMarker stripped -> finishNW acc st{psStage = stageFor m} + | otherwise -> st + ReadingNorm acc + | isBlank line -> st{psStage = NeedNWSection acc} + | stripped == "Weighting" -> st{psStage = ReadingWeight acc} + | stripped == "End" -> finishNW acc st{psStage = BetweenSections} + | Just (n, v) <- parseNameValue cfg line -> st{psStage = ReadingNorm (insertNorm n v acc)} + | otherwise -> st + ReadingWeight acc + | isBlank line -> st{psStage = NeedNWSection acc} + | stripped == "End" -> finishNW acc st{psStage = BetweenSections} + | Just m <- detectMarker stripped -> finishNW acc st{psStage = stageFor m} + | Just (n, v) <- parseNameValue cfg line -> st{psStage = ReadingWeight (insertWeight n v acc)} | otherwise -> st - PhReadingNorm - | isBlank line -> st{psPhase = PhExpectNWsection} - | stripped == "Weighting" -> st{psPhase = PhReadingWeight} - | stripped == "End" -> finishNWset st - | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - in case fields of - (name : val : _) -> - let n = decodeBS (BS8.strip name) - v = parseAmount (spDecimal cfg) (BS8.strip val) - in st{psNormMap = M.insert n v (psNormMap st)} - _ -> st - PhReadingWeight - | isBlank line -> st{psPhase = PhExpectNWsection} - | isNWsetMarker stripped -> - (finishNWset st){psPhase = PhExpectNWname} - | stripped == "End" -> finishNWset st - | otherwise -> - let fields = splitCSV (spDelimiter cfg) line - in case fields of - (name : val : _) -> - let n = decodeBS (BS8.strip name) - v = parseAmount (spDecimal cfg) (BS8.strip val) - in st{psWeightMap = M.insert n v (psWeightMap st)} - _ -> st where stripped = BS8.strip line - finishCategory s = - let !m = - Method - { methodId = - UUID5.generateNamed - simaproNamespace - (BS.unpack $ TE.encodeUtf8 $ "method:" <> psCatName s) - , methodName = psCatName s - , methodDescription = Nothing - , methodUnit = psCatUnit s - , methodCategory = psCatName s - , methodMethodology = Just methodologyName - , methodFactors = reverse (psFactors s) - } - in s{psPhase = PhExpectCategory, psFactors = [], psMethods = m : psMethods s} - - finishDamageCategory s = - let !dc = DamageCategory (psDcName s) (psDcUnit s) (reverse (psDcImpacts s)) - in s{psPhase = PhExpectCategory, psDcImpacts = [], psDamageCats = dc : psDamageCats s} - - finishNWset s - | M.null (psNormMap s) && M.null (psWeightMap s) = s{psPhase = PhExpectCategory} - | otherwise = - let !nw = NormWeightSet (psNWname s) (psNormMap s) (psWeightMap s) - in s - { psPhase = PhExpectCategory - , psNormMap = M.empty - , psWeightMap = M.empty - , psNWname = "" - , psNWsets = nw : psNWsets s - } - +-- | Append the completed in-progress block (if any) onto the right list. +-- Shared by 'step' (mid-stream, on blanks/markers/End) and 'finalize' (at EOF). +finishCat :: CatAccum -> ParseState -> ParseState +finishCat (CatAccum name unit factors) st + | null factors = st + | otherwise = st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st} + +finishDamage :: DamageAccum -> ParseState -> ParseState +finishDamage (DamageAccum name unit impacts) st + | null impacts = st + | otherwise = st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st} + +finishNW :: NWAccum -> ParseState -> ParseState +finishNW (NWAccum name norm weight) st + | M.null norm && M.null weight = st + | otherwise = st{psNWsets = NormWeightSet name norm weight : psNWsets st} + +-- | Flush whatever block is in progress at end of input, then read out the +-- accumulated collections in source order. finalize :: ParseState -> MethodCollection finalize st = - let methods = case psPhase st of - PhReadingCFs - | not (null (psFactors st)) -> - let !m = - Method - { methodId = - UUID5.generateNamed - simaproNamespace - (BS.unpack $ TE.encodeUtf8 $ "method:" <> psCatName st) - , methodName = psCatName st - , methodDescription = Nothing - , methodUnit = psCatUnit st - , methodCategory = psCatName st - , methodMethodology = Nothing - , methodFactors = reverse (psFactors st) - } - in reverse (m : psMethods st) - _ -> reverse (psMethods st) - -- Flush any pending NW set - nwSets = case psPhase st of - PhReadingWeight - | not (M.null (psWeightMap st)) -> - let !nw = NormWeightSet (psNWname st) (psNormMap st) (psWeightMap st) - in reverse (nw : psNWsets st) - PhReadingNorm - | not (M.null (psNormMap st)) -> - let !nw = NormWeightSet (psNWname st) (psNormMap st) (psWeightMap st) - in reverse (nw : psNWsets st) - _ -> reverse (psNWsets st) - in MethodCollection methods (reverse (psDamageCats st)) nwSets [] + let s = finishCurrent st + in MethodCollection + (reverse (psMethods s)) + (reverse (psDamageCats s)) + (reverse (psNWsets s)) + [] + +finishCurrent :: ParseState -> ParseState +finishCurrent st = case psStage st of + ReadingCFs acc -> finishCat acc st + ReadingDcImpacts acc -> finishDamage acc st + ReadingNorm acc -> finishNW acc st + ReadingWeight acc -> finishNW acc st + NeedNWSection acc -> finishNW acc st + -- Stages with no in-progress block to flush. Enumerated (not wildcarded) + -- so a future accumulator-carrying stage can't silently skip its EOF flush. + Header -> st + MethodMeta -> st + BetweenSections -> st + NeedCatLine -> st + NeedSubstances{} -> st + NeedDcLine -> st + NeedDcImpacts{} -> st + NeedNWName -> st + +consCF :: MethodCF -> CatAccum -> CatAccum +consCF cf (CatAccum name unit factors) = CatAccum name unit (cf : factors) + +consImpact :: (Text, Double) -> DamageAccum -> DamageAccum +consImpact i (DamageAccum name unit impacts) = DamageAccum name unit (i : impacts) + +insertNorm :: Text -> Double -> NWAccum -> NWAccum +insertNorm n v (NWAccum name norm weight) = NWAccum name (M.insert n v norm) weight + +insertWeight :: Text -> Double -> NWAccum -> NWAccum +insertWeight n v (NWAccum name norm weight) = NWAccum name norm (M.insert n v weight) + +buildMethod :: Text -> Text -> Text -> [MethodCF] -> Method +buildMethod methodology name unit factors = + Method + { methodId = + UUID5.generateNamed + simaproNamespace + (BS.unpack $ TE.encodeUtf8 $ "method:" <> name) + , methodName = name + , methodDescription = Nothing + , methodUnit = unit + , methodCategory = name + , methodMethodology = Just methodology + , methodFactors = factors + } + +-- ============================================================================ +-- Line parsers +-- ============================================================================ + +-- | A line that begins a new section. "End" is deliberately not a marker — it +-- only closes the current block — so the reading stages handle it inline. +data Marker = MImpactCat | MDamageCat | MNWSet + +detectMarker :: BS.ByteString -> Maybe Marker +detectMarker s + | s == "Impact category" = Just MImpactCat + | s == "Damage category" = Just MDamageCat + | isNWsetMarker s = Just MNWSet + | otherwise = Nothing + +stageFor :: Marker -> Stage +stageFor MImpactCat = NeedCatLine +stageFor MDamageCat = NeedDcLine +stageFor MNWSet = NeedNWName + +-- | Parse one substance/CF row into a 'MethodCF', or 'Nothing' if the row is +-- too short to be a factor line. +parseCFRow :: SimaProConfig -> BS.ByteString -> Maybe MethodCF +parseCFRow cfg line = + case splitCSV (spDelimiter cfg) line of + (comp : sub : name : cas : cfVal : cfUnit : _) -> + let !rawName = decodeBS (BS8.strip name) + -- Keep the full suffixed name so the CF's 'mcfFlowRef' UUID + -- matches the suffixed biosphere flow UUID parsed by + -- 'SimaPro.Parser.bioRowToExchange'. The location is also + -- exposed via 'mcfConsumerLocation' for regional dispatch on + -- engines that key CFs by activity location (openLCA JSON-LD); + -- SimaPro CSV CFs are already region-tagged in the name, so + -- dual storage is correct. + !mLoc = snd (extractLocationSuffix rawName) + !cfUnitT = decodeBS (BS8.strip cfUnit) + -- UUID hashed via the shared 'generateFlowUUID' + + -- 'normalizeSimaProCompartment' so the CF side and + -- 'SimaPro.Parser.bioRowToExchange' produce the same UUID for + -- the same flow. + !flowRef = + generateFlowUUID + rawName + (normalizeSimaProCompartment (decodeBS comp) (decodeBS sub)) + cfUnitT + !cf = + MethodCF + { mcfFlowRef = flowRef + , mcfFlowName = rawName + , mcfDirection = direction comp + , mcfValue = parseAmount (spDecimal cfg) (BS8.strip cfVal) + , mcfCompartment = mkCompartment comp sub + , mcfCAS = normalizeCAS (decodeBS (BS8.strip cas)) + , mcfUnit = cfUnitT + , mcfConsumerLocation = mLoc + } + in Just cf + _ -> Nothing + +-- | Parse a two-column @name;value@ row (damage impacts, normalization, +-- weighting), or 'Nothing' if the row lacks both columns. +parseNameValue :: SimaProConfig -> BS.ByteString -> Maybe (Text, Double) +parseNameValue cfg line = + case splitCSV (spDelimiter cfg) line of + (name : val : _) -> + Just (decodeBS (BS8.strip name), parseAmount (spDecimal cfg) (BS8.strip val)) + _ -> Nothing + +-- | Parse a @Name;Unit@ header line. Total: a missing unit yields "". +parseNameUnit :: SimaProConfig -> BS.ByteString -> (Text, Text) +parseNameUnit cfg line = + case splitCSV (spDelimiter cfg) line of + (name : unit : _) -> (decodeBS (BS8.strip name), decodeBS (BS8.strip unit)) + [name] -> (decodeBS (BS8.strip name), "") + [] -> ("", "") -- ============================================================================ -- Helpers @@ -364,10 +361,6 @@ parseMethodName _cfg = go False isBlank :: BS.ByteString -> Bool isBlank = BS.null . BS8.strip -head' :: [a] -> a -head' (x : _) = x -head' [] = error "Method.ParserSimaPro: unexpected empty field list" - direction :: BS.ByteString -> FlowDirection direction comp | lc == "raw" || lc == "resources" || "raw" `BS8.isPrefixOf` lc = Input From 8950615e8315c5692f96b66e58d76e0ec08bb898 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 17:52:41 +0200 Subject: [PATCH 34/43] refactor(ecospold2): factor parseWithXeno into SAX combinators (#97) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary `parseWithXeno` was a single ~465-line function. Its `closeTag` handler held two near-duplicate ~115-line exchange-finalization blocks plus a dozen field setters, all repeating the same three idioms: text harvest, path pop, and per-kind `InIntermediateExchange`/`InElementaryExchange` dispatch. This extracts small top-level pure combinators so each block now carries only its domain decision logic: - **idioms** — `accumText`, `popPath`, `popText`, `pathAt` - **dispatch** — `onExchange` (over `mapExchange`); the exhaustive `ElementContext` matches live in `currentIntermediate` / `currentElementary` / `inGeneralComment`, so call sites stay wildcard-free - **exchange close** — `mkUnit`, `resolveGroups`, `missingUnitWarning`, `parseUUIDOrNil`, `finishExchange`, and the `add*` state pushes - **plumbing** — result handling becomes an `Either`-monad do-block (`Data.Bifunctor.first`); `buildResult`'s tail becomes an `fmap` Combinators are kept allocation-neutral (no lens on this hot strict fold). Net **−29 lines**, single file. ## Test plan - [x] `cabal build` clean — only the 2 pre-existing MCP warnings, none from this file - [x] `cabal test` — 1107/1107 hspec green, 1 pending (unchanged from baseline) - [x] EcoSpold2 group green: per-exchange comments, property-comment isolation, waste patterns A & B, native activityType, malformed-input `Left` --- src/EcoSpold/Parser2.hs | 497 +++++++++++++++++++--------------------- 1 file changed, 234 insertions(+), 263 deletions(-) diff --git a/src/EcoSpold/Parser2.hs b/src/EcoSpold/Parser2.hs index 904a0131..d82fe7d3 100644 --- a/src/EcoSpold/Parser2.hs +++ b/src/EcoSpold/Parser2.hs @@ -4,6 +4,7 @@ module EcoSpold.Parser2 (streamParseActivityAndFlowsFromFile, normalizeCAS) where +import Data.Bifunctor (first) import qualified Data.ByteString as BS import qualified Data.Map as M import Data.Maybe (catMaybes, fromMaybe) @@ -224,14 +225,146 @@ ecoSpoldSpecialActivityTypeLabel = \case 6 -> "Import activity" n -> "Unknown (code " <> T.pack (show n) <> ")" +-- ============================================================================ +-- SAX state combinators (pure, allocation-neutral — inlined into the fold) +-- ============================================================================ + +-- | Concatenated, entity-decoded text accumulated since the element opened. +accumText :: ParseState -> Text +accumText = T.concat . reverse . map bsToText . psTextAccum + +-- | Pop one element off the path stack. +popPath :: ParseState -> ParseState +popPath st = st{psPath = drop 1 (psPath st)} + +-- | The common close-tag epilogue: pop the path and discard accumulated text. +popText :: ParseState -> ParseState +popText st = (popPath st){psTextAccum = []} + +-- | Is the element at the given depth (0 = currently-open tag) named @name@? +pathAt :: Int -> BS.ByteString -> ParseState -> Bool +pathAt depth name st = case drop depth (psPath st) of + (e : _) -> isElement e name + [] -> False + +-- | Transform the open exchange accumulator, leaving non-exchange contexts +-- untouched. The exhaustive match lives here so the call sites stay wildcard-free. +mapExchange :: + (IntermediateData -> IntermediateData) -> + (ElementaryData -> ElementaryData) -> + ElementContext -> + ElementContext +mapExchange fi fe = \case + InIntermediateExchange d -> InIntermediateExchange (fi d) + InElementaryExchange d -> InElementaryExchange (fe d) + InActivityName -> InActivityName + InGeographyShortname -> InGeographyShortname + InGeneralCommentText i -> InGeneralCommentText i + Other -> Other + +-- | Apply per-kind updates to the open exchange accumulator, then pop path+text. +onExchange :: + (IntermediateData -> IntermediateData) -> + (ElementaryData -> ElementaryData) -> + ParseState -> + ParseState +onExchange fi fe st = popText st{psContext = mapExchange fi fe (psContext st)} + +-- | The currently-open intermediate exchange, if any. +currentIntermediate :: ParseState -> Maybe IntermediateData +currentIntermediate st = case psContext st of + InIntermediateExchange d -> Just d + InElementaryExchange _ -> Nothing + InActivityName -> Nothing + InGeographyShortname -> Nothing + InGeneralCommentText _ -> Nothing + Other -> Nothing + +-- | The currently-open elementary exchange, if any. +currentElementary :: ParseState -> Maybe ElementaryData +currentElementary st = case psContext st of + InElementaryExchange d -> Just d + InIntermediateExchange _ -> Nothing + InActivityName -> Nothing + InGeographyShortname -> Nothing + InGeneralCommentText _ -> Nothing + Other -> Nothing + +-- | Are we inside a @generalComment@ @\@ element? +inGeneralComment :: ParseState -> Bool +inGeneralComment st = case psContext st of + InGeneralCommentText _ -> True + InIntermediateExchange _ -> False + InElementaryExchange _ -> False + InActivityName -> False + InGeographyShortname -> False + Other -> False + +-- | Build a 'Unit', substituting placeholders for a missing unit name. +mkUnit :: UUID -> Text -> Unit +mkUnit uuid name + | T.null name = Unit uuid "UNKNOWN_UNIT" "?" "" + | otherwise = Unit uuid name name "" + +-- | Resolve in/out group: prefer the attribute value, fall back to the pending +-- value captured from the child @\@ / @\@ element. +resolveGroups :: Text -> Text -> ParseState -> (Text, Text) +resolveGroups inG outG st = + ( if T.null inG then psPendingInputGroup st else inG + , if T.null outG then psPendingOutputGroup st else outG + ) + +-- | Warning emitted (as a singleton, else empty) when an exchange has no unit name. +missingUnitWarning :: String -> Text -> Text -> [String] +missingUnitWarning kind flowId unitName = + [ "[WARNING] Missing unit name for " + ++ kind + ++ " exchange with flow ID: " + ++ T.unpack flowId + ++ " - using 'UNKNOWN_UNIT' placeholder" + | T.null unitName + ] + +-- | Parse a UUID, treating the empty string as the nil UUID (no warning). +parseUUIDOrNil :: Text -> (UUID, Maybe String) +parseUUIDOrNil t + | T.null t = (UUID.nil, Nothing) + | otherwise = parseUUID t + +-- | Second argument when non-blank, otherwise the fallback. +nonBlankOr :: Text -> Text -> Text +nonBlankOr fallback t = if T.null t then fallback else t + +addExchange :: Exchange -> ParseState -> ParseState +addExchange ex st = st{psExchanges = ex : psExchanges st} + +addTechFlow :: TechnosphereFlow -> ParseState -> ParseState +addTechFlow f st = st{psTechFlows = f : psTechFlows st} + +addBioFlow :: BiosphereFlow -> ParseState -> ParseState +addBioFlow f st = st{psBioFlows = f : psBioFlows st} + +addWasteFlow :: WasteFlow -> ParseState -> ParseState +addWasteFlow f st = st{psWasteFlows = f : psWasteFlows st} + +-- | Common exchange-close bookkeeping: leave the exchange context, pop the +-- path/text, clear pending groups, record the unit and any warnings. +finishExchange :: Unit -> [String] -> ParseState -> ParseState +finishExchange unit warns st = + (popText st) + { psContext = Other + , psPendingInputGroup = "" + , psPendingOutputGroup = "" + , psUnits = unit : psUnits st + , psWarnings = warns ++ psWarnings st + } + -- | Xeno SAX parser implementation parseWithXeno :: BS.ByteString -> ProcessId -> Either String ((Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit]), [String]) -parseWithXeno xmlContent processId = - case X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent of - Left err -> Left (show err) - Right finalState -> case buildResult finalState processId of - Left err -> Left err - Right result -> Right (result, reverse $ psWarnings finalState) +parseWithXeno xmlContent processId = do + finalState <- first show (X.fold openTag attribute endOpen text closeTag cdata initialParseState xmlContent) + result <- buildResult finalState processId + pure (result, reverse (psWarnings finalState)) where -- Open tag handler - update path and context openTag state tagName = @@ -258,17 +391,12 @@ parseWithXeno xmlContent processId = -- Attribute handler - extract attributes attribute state name value = - let isInsideProperty = case psPath state of - [] -> False - (current : _) -> isElement current "property" - isOnComment = case psPath state of - [] -> False - (current : _) -> isElement current "comment" + let isInsideProperty = pathAt 0 "property" state -- xml:lang on the currently-open ; remembered until closeTag. -- Attribute order is not significant for entity ref selection — we -- only need the lang at close-time. withLang st - | isOnComment && isElement name "xml:lang" = st{psPendingCommentLang = bsToText value} + | pathAt 0 "comment" state && isElement name "xml:lang" = st{psPendingCommentLang = bsToText value} | otherwise = st in case psContext state of InIntermediateExchange idata -> @@ -297,9 +425,7 @@ parseWithXeno xmlContent processId = _ -> -- Attributes on the opening tag carry the -- ecospold2 activityType and specialActivityType enums. - let onActivity = case psPath state of - (current : _) -> isElement current "activity" - [] -> False + let onActivity = pathAt 0 "activity" state captured | onActivity && isElement name "activityType" = state{psActivityType = bsToIntMaybe value} @@ -321,60 +447,48 @@ parseWithXeno xmlContent processId = -- Close tag handler - finalize elements closeTag state tagName | isElement tagName "activityName" = - let txt = T.concat $ reverse $ map bsToText (psTextAccum state) - in state{psActivityName = Just txt, psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} + (popText state){psActivityName = Just (accumText state), psContext = Other} | isElement tagName "comment" = -- Capture text only when the immediate parent is the -- exchange itself, not a nested . Property comments -- describe the property (e.g. "carbon content"), not the exchange. - let parent = case drop 1 (psPath state) of - (p : _) -> p - [] -> "" - txt = T.concat $ reverse $ map bsToText (psTextAccum state) + let txt = accumText state lang = psPendingCommentLang state - popPath = state{psPath = drop 1 (psPath state), psTextAccum = [], psPendingCommentLang = ""} - in case psContext state of - InIntermediateExchange idata - | isElement parent "intermediateExchange" -> - popPath{psContext = InIntermediateExchange idata{idComment = pickComment (idComment idata) lang txt}} - InElementaryExchange edata - | isElement parent "elementaryExchange" -> - popPath{psContext = InElementaryExchange edata{edComment = pickComment (edComment edata) lang txt}} - _ -> popPath + st' = + onExchange + (\d -> if pathAt 1 "intermediateExchange" state then d{idComment = pickComment (idComment d) lang txt} else d) + (\d -> if pathAt 1 "elementaryExchange" state then d{edComment = pickComment (edComment d) lang txt} else d) + state + in st'{psPendingCommentLang = ""} | isElement tagName "shortname" && psContext state == InGeographyShortname = - let txt = T.concat $ reverse $ map bsToText (psTextAccum state) - in state{psLocation = Just txt, psContext = Other, psPath = drop 1 (psPath state), psTextAccum = []} + (popText state){psLocation = Just (accumText state), psContext = Other} | isElement tagName "intermediateExchange" = - case psContext state of - InIntermediateExchange idata -> - -- Use pending group values if attribute values are empty - let finalInputGroup = if T.null (idInputGroup idata) then psPendingInputGroup state else idInputGroup idata - finalOutputGroup = if T.null (idOutputGroup idata) then psPendingOutputGroup state else idOutputGroup idata - isInput = not $ T.null finalInputGroup + case currentIntermediate state of + Nothing -> popPath state + Just idata -> + let (finalInputGroup, finalOutputGroup) = resolveGroups (idInputGroup idata) (idOutputGroup idata) state + isInput = not (T.null finalInputGroup) isOutput = T.null finalInputGroup - -- Reference flow identification: - -- Reference products are identified ONLY by outputGroup="0" - -- This works for both normal production (positive amount) and waste treatment (negative amount) - -- Negative inputs (like wastewater discharge) should NOT be considered reference products - -- outputGroup valid values: 0=reference product, 1-3=byproducts, 4=allocated byproduct, 5=recyclable + -- Reference products are identified ONLY by outputGroup="0"; this holds for + -- normal production (positive amount) and waste treatment (negative amount). + -- Negative inputs (e.g. wastewater discharge) are never reference products. isReferenceProduct = isOutput && finalOutputGroup == "0" - -- Pattern B: intermediateExchange tagged as Waste via classification - -- (System='By-product classification', Value='Waste'). When set, the flow - -- represents a waste output that consumers treat via a treatment activity. + -- Pattern B: intermediateExchange tagged Waste via classification + -- (System='By-product classification', Value='Waste') — a waste output that + -- consumers treat via a treatment activity. isWasteFlow = M.lookup "By-product classification" (idClassifications idata) == Just "Waste" - -- Parse UUIDs and collect warnings (flowUUID, flowWarn) = parseUUID (idFlowId idata) (unitUUID, unitWarn) = parseUUID (idUnitId idata) - (linkUUID, linkWarn) = - if T.null (idActivityLinkId idata) - then (UUID.nil, Nothing) - else parseUUID (idActivityLinkId idata) - uuidWarnings = catMaybes [flowWarn, unitWarn, linkWarn] + (linkUUID, linkWarn) = parseUUIDOrNil (idActivityLinkId idata) + warns = + catMaybes [flowWarn, unitWarn, linkWarn] + ++ missingUnitWarning "intermediate" (idFlowId idata) (idUnitName idata) techRoleFor | isReferenceProduct = ReferenceProduct | isInput = Input | otherwise = Coproduct - resolvedFlowName = if T.null (idFlowName idata) then idFlowId idata else idFlowName idata + resolvedFlowName = nonBlankOr (idFlowId idata) (idFlowName idata) + unit = mkUnit unitUUID (idUnitName idata) techExchange = TechnosphereExchange { techFlowId = flowUUID @@ -387,14 +501,7 @@ parseWithXeno xmlContent processId = , techComment = snd <$> idComment idata , techPedigree = Nothing } - techFlow = - TechnosphereFlow - flowUUID - resolvedFlowName - unitUUID - (idSynonyms idata) - Nothing -- CAS - Nothing -- substanceId + techFlow = TechnosphereFlow flowUUID resolvedFlowName unitUUID (idSynonyms idata) Nothing Nothing wasteExchange = WasteExchange { waFlowId = flowUUID @@ -407,64 +514,24 @@ parseWithXeno xmlContent processId = , waComment = snd <$> idComment idata , waPedigree = Nothing } - wasteFlow = - WasteFlow - flowUUID - resolvedFlowName - unitUUID - (idSynonyms idata) - Nothing -- CAS - Nothing -- substanceId - unitNameWarning = - [ "[WARNING] Missing unit name for intermediate exchange with flow ID: " - ++ T.unpack (idFlowId idata) - ++ " - using 'UNKNOWN_UNIT' placeholder" - | T.null (idUnitName idata) - ] - unit = - Unit - unitUUID - (if T.null (idUnitName idata) then "UNKNOWN_UNIT" else idUnitName idata) - (if T.null (idUnitName idata) then "?" else idUnitName idata) - "" - -- Reference product unit: only meaningful when the flow stays in the technosphere + wasteFlow = WasteFlow flowUUID resolvedFlowName unitUUID (idSynonyms idata) Nothing Nothing + -- Reference product unit only when the flow stays in the technosphere -- (waste outputs are never the reference product of a producing process). newRefUnit = if isReferenceProduct && not isWasteFlow && not (T.null (idUnitName idata)) then Just (idUnitName idata) else psRefUnit state - baseState = - state - { psContext = Other - , psPath = drop 1 (psPath state) - , psTextAccum = [] - , psPendingInputGroup = "" - , psPendingOutputGroup = "" - , psRefUnit = newRefUnit - , psUnits = unit : psUnits state - , psWarnings = uuidWarnings ++ unitNameWarning ++ psWarnings state - } + base = (finishExchange unit warns state){psRefUnit = newRefUnit} in if isWasteFlow - then - baseState - { psExchanges = wasteExchange : psExchanges state - , psWasteFlows = wasteFlow : psWasteFlows state - } - else - baseState - { psExchanges = techExchange : psExchanges state - , psTechFlows = techFlow : psTechFlows state - } - _ -> state{psPath = drop 1 (psPath state)} + then addExchange wasteExchange (addWasteFlow wasteFlow base) + else addExchange techExchange (addTechFlow techFlow base) | isElement tagName "elementaryExchange" = - case psContext state of - InElementaryExchange edata -> - -- Use pending group values if attribute values are empty - let finalInputGroup = if T.null (edInputGroup edata) then psPendingInputGroup state else edInputGroup edata - finalOutputGroup = if T.null (edOutputGroup edata) then psPendingOutputGroup state else edOutputGroup edata - -- A missing compartment becomes 'Nothing', not an empty - -- 'Compartment ""' sentinel — the latter used to silently - -- collide with method-side empty mediums. + case currentElementary state of + Nothing -> popPath state + Just edata -> + let (finalInputGroup, finalOutputGroup) = resolveGroups (edInputGroup edata) (edOutputGroup edata) state + -- A missing compartment becomes 'Nothing', not an empty 'Compartment ""' + -- sentinel — the latter used to silently collide with method-side empty mediums. mCompName = case edCompartments edata of (c : _) | not (T.null c) -> Just c _ -> Nothing @@ -475,29 +542,29 @@ parseWithXeno xmlContent processId = (Nothing, Nothing) -> Nothing (Just c, sc) -> Just (Compartment c sc) (Nothing, Just _) -> Nothing -- sub without medium is meaningless; drop - -- Determine the biosphere direction. - -- Primary: use inputGroup/outputGroup if present. - -- Fallback: compartment heuristic — natural-resource flows are extractions. + -- Biosphere direction: prefer inputGroup/outputGroup, else fall back to the + -- compartment heuristic (natural-resource flows are extractions). direction | not (T.null finalInputGroup) = Resource | not (T.null finalOutputGroup) = Emission | otherwise = case edCompartments edata of (comp : _) | T.toLower comp == "natural resource" -> Resource _ -> Emission - -- Pattern A: elementaryExchange with compartment="inventory indicator" - -- subcompartment="waste". Waste outputs surfaced through the - -- elementary axis but semantically technosphere waste — route them - -- to WasteExchange instead of BiosphereExchange. + -- Pattern A: compartment="inventory indicator" / subcompartment="waste". + -- Surfaced through the elementary axis but semantically technosphere waste — + -- route to WasteExchange instead of BiosphereExchange. isInventoryIndicatorWaste = case (mCompName, subCompartment) of (Just c, Just s) -> T.toLower (T.strip c) == "inventory indicator" && T.toLower (T.strip s) == "waste" _ -> False - -- Parse UUIDs and collect warnings (flowUUID, flowWarn) = parseUUID (edFlowId edata) (unitUUID, unitWarn) = parseUUID (edUnitId edata) - uuidWarnings = catMaybes [flowWarn, unitWarn] - resolvedFlowName = if T.null (edFlowName edata) then edFlowId edata else edFlowName edata + warns = + catMaybes [flowWarn, unitWarn] + ++ missingUnitWarning "elementary" (edFlowId edata) (edUnitName edata) + resolvedFlowName = nonBlankOr (edFlowId edata) (edFlowName edata) + unit = mkUnit unitUUID (edUnitName edata) bioExchange = BiosphereExchange { bioFlowId = flowUUID @@ -508,15 +575,7 @@ parseWithXeno xmlContent processId = , bioComment = snd <$> edComment edata , bioPedigree = Nothing } - bioFlow = - BiosphereFlow - flowUUID - resolvedFlowName - unitUUID - (edSynonyms edata) - (edCAS edata) - Nothing -- substanceId - to be filled later - compartment + bioFlow = BiosphereFlow flowUUID resolvedFlowName unitUUID (edSynonyms edata) (edCAS edata) Nothing compartment wasteExchange = WasteExchange { waFlowId = flowUUID @@ -529,147 +588,61 @@ parseWithXeno xmlContent processId = , waComment = snd <$> edComment edata , waPedigree = Nothing } - wasteFlow = - WasteFlow - flowUUID - resolvedFlowName - unitUUID - (edSynonyms edata) - (edCAS edata) - Nothing -- substanceId - unitNameWarning = - [ "[WARNING] Missing unit name for elementary exchange with flow ID: " - ++ T.unpack (edFlowId edata) - ++ " - using 'UNKNOWN_UNIT' placeholder" - | T.null (edUnitName edata) - ] - unit = - Unit - unitUUID - (if T.null (edUnitName edata) then "UNKNOWN_UNIT" else edUnitName edata) - (if T.null (edUnitName edata) then "?" else edUnitName edata) - "" - baseState = - state - { psContext = Other - , psPath = drop 1 (psPath state) - , psTextAccum = [] - , psPendingInputGroup = "" - , psPendingOutputGroup = "" - , psUnits = unit : psUnits state - , psWarnings = uuidWarnings ++ unitNameWarning ++ psWarnings state - } + wasteFlow = WasteFlow flowUUID resolvedFlowName unitUUID (edSynonyms edata) (edCAS edata) Nothing + base = finishExchange unit warns state in if isInventoryIndicatorWaste - then - baseState - { psExchanges = wasteExchange : psExchanges state - , psWasteFlows = wasteFlow : psWasteFlows state - } - else - baseState - { psExchanges = bioExchange : psExchanges state - , psBioFlows = bioFlow : psBioFlows state - } - _ -> state{psPath = drop 1 (psPath state)} + then addExchange wasteExchange (addWasteFlow wasteFlow base) + else addExchange bioExchange (addBioFlow bioFlow base) | isElement tagName "text" = - case psContext state of - InGeneralCommentText _idx -> - let txt = T.concat $ reverse $ map bsToText (psTextAccum state) - in -- Store as (index, text) pair for later sorting - if T.null txt - then state{psContext = Other, psTextAccum = []} - else state{psDescription = txt : psDescription state, psContext = Other, psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} + -- The generalComment branch deliberately does NOT pop the path. + if inGeneralComment state + then + let txt = accumText state + withDesc = if T.null txt then state else state{psDescription = txt : psDescription state} + in withDesc{psContext = Other, psTextAccum = []} + else popText state | isElement tagName "name" = - let txt = T.concat $ reverse $ map bsToText (psTextAccum state) - isInsideProperty = case psPath state of - (_ : parent : _) -> isElement parent "property" - _ -> False - in case psContext state of - InIntermediateExchange idata - | not isInsideProperty -> - state{psContext = InIntermediateExchange idata{idFlowName = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InElementaryExchange edata - | not isInsideProperty -> - state{psContext = InElementaryExchange edata{edFlowName = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} + let txt = accumText state + in if pathAt 1 "property" state + then popText state + else onExchange (\d -> d{idFlowName = txt}) (\d -> d{edFlowName = txt}) state | isElement tagName "unitName" = - let txt = T.concat $ reverse $ map bsToText (psTextAccum state) - isInsideProperty = case psPath state of - (_ : parent : _) -> isElement parent "property" - _ -> False - in case psContext state of - InIntermediateExchange idata - | not isInsideProperty -> - state{psContext = InIntermediateExchange idata{idUnitName = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - InElementaryExchange edata - | not isInsideProperty -> - state{psContext = InElementaryExchange edata{edUnitName = txt}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} + let txt = accumText state + in if pathAt 1 "property" state + then popText state + else onExchange (\d -> d{idUnitName = txt}) (\d -> d{edUnitName = txt}) state | isElement tagName "synonym" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InIntermediateExchange idata - | not (T.null txt) -> - let syns = M.insertWith S.union "en" (S.singleton txt) (idSynonyms idata) - in state{psContext = InIntermediateExchange idata{idSynonyms = syns}, psPath = drop 1 (psPath state), psTextAccum = []} - InElementaryExchange edata - | not (T.null txt) -> - let syns = M.insertWith S.union "en" (S.singleton txt) (edSynonyms edata) - in state{psContext = InElementaryExchange edata{edSynonyms = syns}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> state{psPath = drop 1 (psPath state), psTextAccum = []} + let txt = T.strip (accumText state) + ins m = if T.null txt then m else M.insertWith S.union "en" (S.singleton txt) m + in onExchange (\d -> d{idSynonyms = ins (idSynonyms d)}) (\d -> d{edSynonyms = ins (edSynonyms d)}) state + -- inputGroup / outputGroup: stash the pending value, keep the parent exchange context. | isElement tagName "inputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in -- DON'T change psContext - preserve the parent exchange context - state{psPendingInputGroup = txt, psPath = drop 1 (psPath state), psTextAccum = []} + (popText state){psPendingInputGroup = T.strip (accumText state)} | isElement tagName "outputGroup" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in -- DON'T change psContext - preserve the parent exchange context - state{psPendingOutputGroup = txt, psPath = drop 1 (psPath state), psTextAccum = []} + (popText state){psPendingOutputGroup = T.strip (accumText state)} | isElement tagName "compartment" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InElementaryExchange edata - | not (T.null txt) -> - state{psContext = InElementaryExchange edata{edCompartments = txt : edCompartments edata}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> - state{psPath = drop 1 (psPath state), psTextAccum = []} + let txt = T.strip (accumText state) + add d = if T.null txt then d else d{edCompartments = txt : edCompartments d} + in onExchange id add state | isElement tagName "subcompartment" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in case psContext state of - InElementaryExchange edata - | not (T.null txt) -> - state{psContext = InElementaryExchange edata{edSubcompartments = txt : edSubcompartments edata}, psPath = drop 1 (psPath state), psTextAccum = []} - _ -> - state{psPath = drop 1 (psPath state), psTextAccum = []} + let txt = T.strip (accumText state) + add d = if T.null txt then d else d{edSubcompartments = txt : edSubcompartments d} + in onExchange id add state | isElement tagName "classificationSystem" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) - in state{psPendingClassSystem = txt, psPath = drop 1 (psPath state), psTextAccum = []} + (popText state){psPendingClassSystem = T.strip (accumText state)} | isElement tagName "classificationValue" = - let txt = T.strip $ T.concat $ reverse $ map bsToText (psTextAccum state) + let txt = T.strip (accumText state) sys = psPendingClassSystem state - emptyPair = T.null sys || T.null txt - in case psContext state of - InIntermediateExchange idata - | not emptyPair -> - state - { psContext = - InIntermediateExchange - idata{idClassifications = M.insert sys txt (idClassifications idata)} - , psPath = drop 1 (psPath state) - , psTextAccum = [] - } - _ -> - state - { psClassifications = - if emptyPair - then psClassifications state - else M.insert sys txt (psClassifications state) - , psPath = drop 1 (psPath state) - , psTextAccum = [] - } + in popText $ + if T.null sys || T.null txt + then state + else case currentIntermediate state of + -- Exchange-scoped classification (e.g. By-product → Waste). + Just d -> state{psContext = InIntermediateExchange d{idClassifications = M.insert sys txt (idClassifications d)}} + -- Otherwise an activity-level classification. + Nothing -> state{psClassifications = M.insert sys txt (psClassifications state)} | otherwise = - state{psPath = drop 1 (psPath state)} + popPath state -- CDATA handler - treat as text cdata = text @@ -688,9 +661,7 @@ parseWithXeno xmlContent processId = bios = reverse (psBioFlows st) wastes = reverse (psWasteFlows st) units = reverse (psUnits st) - in case applyCutoffStrategy activity of - Right act -> Right (act, techs, bios, wastes, units) - Left err -> Left err + in (\act -> (act, techs, bios, wastes, units)) <$> applyCutoffStrategy activity -- | Parse EcoSpold file using Xeno SAX parser streamParseActivityAndFlowsFromFile :: FilePath -> IO (Either String (Activity, [TechnosphereFlow], [BiosphereFlow], [WasteFlow], [Unit])) From cad79700719a2a6bd1ed530b3a81668c2b408ab7 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 15:46:21 +0200 Subject: [PATCH 35/43] refactor(mcp): extract shared ExceptT plumbing for tool handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tool handlers each re-inlined the same wrapper, DB lookup, and error-lifting boilerplate. Factor it into four small primitives — runTool (the runExceptT/toolError envelope), requireDatabase, liftShow, and transformers' own `except` in place of hand-rolled `ExceptT . pure` — and migrate the already-ExceptT handlers onto them. Behaviour-preserving: same toolError/toolSuccess outputs, same short-circuit order. Removes one do-block nesting level from every converted handler (hence the indentation churn). --- src/API/MCP.hs | 862 ++++++++++++++++++++++++------------------------- 1 file changed, 424 insertions(+), 438 deletions(-) diff --git a/src/API/MCP.hs b/src/API/MCP.hs index 81a15ee5..443c9fd5 100644 --- a/src/API/MCP.hs +++ b/src/API/MCP.hs @@ -28,8 +28,9 @@ import System.Random (randomIO) import API.Resources (Param (..), ParamKind (..), Resource) import qualified API.Resources as R import Config (ClassificationEntry (..), ClassificationPreset (..), DatabaseConfig (..)) +import Control.Applicative ((<|>)) import Control.Monad.IO.Class (liftIO) -import Control.Monad.Trans.Except (ExceptT (..), runExceptT, throwE) +import Control.Monad.Trans.Except (ExceptT (..), except, runExceptT, throwE) import Database.Manager (DatabaseManager (..), LoadedDatabase (..), getDatabase) import qualified Database.Manager as DM @@ -376,6 +377,28 @@ withDb dbManager rid args action = Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) Just ld -> action (ldDatabase ld, ldSharedSolver ld) +-- --------------------------------------------------------------------------- +-- ExceptT plumbing shared by every handler +-- --------------------------------------------------------------------------- + +{- | Run a handler body in the shared 'ExceptT Text IO Value' monad: any +'throwE'/'Left' short-circuits to a 'toolError', a success passes through. +Every tool handler is @runTool rid $ do …@. +-} +runTool :: Value -> ExceptT Text IO Value -> IO Value +runTool rid = fmap (either (toolError rid) id) . runExceptT + +{- | Resolve a loaded database by name, short-circuiting with the standard +"not loaded" message. The 'ExceptT' counterpart to 'withDb'. +-} +requireDatabase :: DatabaseManager -> Text -> ExceptT Text IO LoadedDatabase +requireDatabase dbManager dbName = + ExceptT $ maybe (Left ("Database not loaded: " <> dbName)) Right <$> getDatabase dbManager dbName + +-- | Lift an 'Either' whose error only has a 'Show' instance into the handler monad. +liftShow :: (Show e) => Either e a -> ExceptT Text IO a +liftShow = either (throwE . T.pack . show) pure + textArg :: Text -> KeyMap Value -> Maybe Text textArg key args = case KM.lookup (fromText key) args of Just (String t) -> Just t @@ -824,66 +847,58 @@ so inventories from dep DBs are merged into the returned flows. -} callGetInventory :: DatabaseManager -> Value -> KeyMap Value -> IO Value callGetInventory dbManager rid args = - either (toolError rid) id - <$> runExceptT - ( do - (dbName, pid) <- ExceptT $ pure $ (,) <$> requireText "database" args <*> requireText "process_id" args - mLoaded <- liftIO $ getDatabase dbManager dbName - ld <- case mLoaded of - Nothing -> throwE ("Database not loaded: " <> dbName) - Just x -> pure x - let db = ldDatabase ld - solver = ldSharedSolver ld - limit = fromMaybe 50 (intArg "limit" args) - nameFilter = textArg "flow" args - ExceptT $ pure $ ensureLinked dbName "computing inventory" db - (processId, activity) <- case Service.resolveActivityAndProcessId db pid of - Left err -> throwE (T.pack (show err)) - Right v -> pure v - subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - -- Empty subs: same as GET path (plain cross-DB inventory). - -- Non-empty subs: route through the substitution-aware pipeline so - -- dep DBs re-solve against the substituted root scaling. - inventory <- - ExceptT $ - if null subs - then fmap (fmap SharedSolver.csInventory) (computeInventoryMatrixWithDepsCached unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver processId) - else - either (Left . T.pack . show) (Right . SharedSolver.csInventory) - <$> Service.inventoryWithSubsAndDeps - unitCfg - (DM.mkDepSolverLookup dbManager) - db - dbName - solver - processId - subs - let inv = Service.convertToInventoryExport db mFlows mUnits processId activity inventory - flows = ieFlows inv - filtered = case nameFilter of - Nothing -> flows - Just q -> filter (T.isInfixOf (T.toLower q) . T.toLower . bfName . ifdFlow) flows - sorted = L.sortBy (\a b -> compare (abs $ ifdQuantity b) (abs $ ifdQuantity a)) filtered - topN = take limit sorted - slim f = - object - [ "flow" .= bfName (ifdFlow f) - , "quantity" .= ifdQuantity f - , "unit" .= ifdUnitName f - , "category" .= ifdCategory f - , "isEmission" .= ifdIsEmission f - ] - pure $ - toolSuccessJson rid $ - object - [ "statistics" .= toJSON (ieStatistics inv) - , "total_flows" .= length flows - , "shown_flows" .= length topN - , "flows" .= map slim topN - ] - ) + runTool rid $ do + (dbName, pid) <- except $ (,) <$> requireText "database" args <*> requireText "process_id" args + ld <- requireDatabase dbManager dbName + let db = ldDatabase ld + solver = ldSharedSolver ld + limit = fromMaybe 50 (intArg "limit" args) + nameFilter = textArg "flow" args + except $ ensureLinked dbName "computing inventory" db + (processId, activity) <- liftShow (Service.resolveActivityAndProcessId db pid) + subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + -- Empty subs: same as GET path (plain cross-DB inventory). + -- Non-empty subs: route through the substitution-aware pipeline so + -- dep DBs re-solve against the substituted root scaling. + inventory <- + ExceptT $ + if null subs + then fmap (fmap SharedSolver.csInventory) (computeInventoryMatrixWithDepsCached unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver processId) + else + either (Left . T.pack . show) (Right . SharedSolver.csInventory) + <$> Service.inventoryWithSubsAndDeps + unitCfg + (DM.mkDepSolverLookup dbManager) + db + dbName + solver + processId + subs + let inv = Service.convertToInventoryExport db mFlows mUnits processId activity inventory + flows = ieFlows inv + filtered = case nameFilter of + Nothing -> flows + Just q -> filter (T.isInfixOf (T.toLower q) . T.toLower . bfName . ifdFlow) flows + sorted = L.sortBy (\a b -> compare (abs $ ifdQuantity b) (abs $ ifdQuantity a)) filtered + topN = take limit sorted + slim f = + object + [ "flow" .= bfName (ifdFlow f) + , "quantity" .= ifdQuantity f + , "unit" .= ifdUnitName f + , "category" .= ifdCategory f + , "isEmission" .= ifdIsEmission f + ] + pure $ + toolSuccessJson rid $ + object + [ "statistics" .= toJSON (ieStatistics inv) + , "total_flows" .= length flows + , "shown_flows" .= length topN + , "flows" .= map slim topN + ] -- | JSON shape for one uncharacterized-flow diagnostic entry. encodeUncharacterized :: UncharacterizedFlow -> Value @@ -950,8 +965,8 @@ runImpactsRequest dbManager args req = do method = lrMethod req dbName = lrDbName req ra = lrResolved req - ExceptT $ pure $ ensureLinked dbName "computing impacts" db - subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) + except $ ensureLinked dbName "computing impacts" db + subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager inventory <- @@ -1013,75 +1028,72 @@ per the naming audit; internal Haskell types keep the 'LCIA' acronym -} callGetImpacts :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callGetImpacts dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - req <- loadLcaRequest dbManager args - ir <- runImpactsRequest dbManager args req - (_, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - let topN = fromMaybe 5 (intArg "top_flows" args) - method = lrMethod req - dbName = lrDbName req - ra = lrResolved req - score = loScore (irOutcome ir) - stats = irMappingStats ir - functionalUnit = - T.pack (showFFloat (Just 2) (irRefProductAmount ir) "") - <> " " - <> irRefProductUnit ir - <> " of " - <> irRefProductName ir - contribs = irContribs ir - topFlows = take topN contribs - webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/impacts/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) - hasNeg = any (\(_, _, c) -> c < 0) contribs - unknownUuids = irUnknownUuids ir - liftIO $ - unless (null unknownUuids) $ - reportProgress Warning $ - "[MCP get_impacts " - <> T.unpack (methodName method) - <> "] " - <> show (length unknownUuids) - <> " inventory flow UUID(s) absent from merged FlowDB — characterization incomplete. Samples: " - <> show (take 3 unknownUuids) - let outcome = irOutcome ir - diagnosticsFields = - [ "uncharacterized_flows" .= map encodeUncharacterized (loUncharacterized outcome) - , "characterized_share" - .= ( if loInventoryAbsSum outcome > 0 - then loCharacterizedSum outcome / loInventoryAbsSum outcome - else 1 :: Double - ) + runTool rid $ do + req <- loadLcaRequest dbManager args + ir <- runImpactsRequest dbManager args req + (_, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + let topN = fromMaybe 5 (intArg "top_flows" args) + method = lrMethod req + dbName = lrDbName req + ra = lrResolved req + score = loScore (irOutcome ir) + stats = irMappingStats ir + functionalUnit = + T.pack (showFFloat (Just 2) (irRefProductAmount ir) "") + <> " " + <> irRefProductUnit ir + <> " of " + <> irRefProductName ir + contribs = irContribs ir + topFlows = take topN contribs + webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/impacts/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) + hasNeg = any (\(_, _, c) -> c < 0) contribs + unknownUuids = irUnknownUuids ir + liftIO $ + unless (null unknownUuids) $ + reportProgress Warning $ + "[MCP get_impacts " + <> T.unpack (methodName method) + <> "] " + <> show (length unknownUuids) + <> " inventory flow UUID(s) absent from merged FlowDB — characterization incomplete. Samples: " + <> show (take 3 unknownUuids) + let outcome = irOutcome ir + diagnosticsFields = + [ "uncharacterized_flows" .= map encodeUncharacterized (loUncharacterized outcome) + , "characterized_share" + .= ( if loInventoryAbsSum outcome > 0 + then loCharacterizedSum outcome / loInventoryAbsSum outcome + else 1 :: Double + ) + ] + pure $ + toolSuccessJson rid $ + attachMarketHintByName (activityName (raActivity ra)) $ + object $ + [ "method" .= methodName method + , "category" .= methodCategory method + , "score" .= score + , "unit" .= methodUnit method + , "functional_unit" .= functionalUnit + , "mapped_flows" .= (msTotal stats - msUnmatched stats) + , "has_negative_contributions" .= hasNeg + , "top_flows" + .= [ object + [ "flow_name" .= bfName f + , "contribution" .= c + , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double) + , "flow_id" .= UUID.toText (bfId f) + , "category" .= bfCompartmentName f + , "compartment" .= bfCompartmentSub f + , "cf_value" .= cfVal + , "flow_unit" .= getUnitNameForBioFlow mUnits f + ] + | (f, cfVal, c) <- topFlows + ] ] - pure $ - toolSuccessJson rid $ - attachMarketHintByName (activityName (raActivity ra)) $ - object $ - [ "method" .= methodName method - , "category" .= methodCategory method - , "score" .= score - , "unit" .= methodUnit method - , "functional_unit" .= functionalUnit - , "mapped_flows" .= (msTotal stats - msUnmatched stats) - , "has_negative_contributions" .= hasNeg - , "top_flows" - .= [ object - [ "flow_name" .= bfName f - , "contribution" .= c - , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double) - , "flow_id" .= UUID.toText (bfId f) - , "category" .= bfCompartmentName f - , "compartment" .= bfCompartmentSub f - , "cf_value" .= cfVal - , "flow_unit" .= getUnitNameForBioFlow mUnits f - ] - | (f, cfVal, c) <- topFlows - ] - ] - ++ webUrlPair - ++ (if fromMaybe False (boolArg "include_diagnostics" args) then diagnosticsFields else []) - ) + ++ webUrlPair + ++ (if fromMaybe False (boolArg "include_diagnostics" args) then diagnosticsFields else []) {- | Handler for the 'compute_sensitivity' MCP tool. Mirrors the REST @POST /sensitivity/{collection}/{methodId}@ endpoint: runs Service.computeSensitivities @@ -1092,77 +1104,72 @@ location-hierarchy walk; non-regionalized methods stay on the classic -} callComputeSensitivity :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callComputeSensitivity dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - req <- loadLcaRequest dbManager args - let ld = lrLoaded req - db = ldDatabase ld - method = lrMethod req - dbName = lrDbName req - ra = lrResolved req - ExceptT $ pure $ ensureLinked dbName "computing sensitivity" db - perts <- - ExceptT $ - pure - ( parseArrayArg - "perturbations" - (Just "'perturbations' is required (array of {consumer, supplier, delta, label?})") - args :: - Either Text [Perturbation] - ) - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method - hier <- liftIO $ DM.getLocationHierarchy dbManager - eRes <- - liftIO $ - Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts - (baselineX, perResults) <- case eRes of - Left err -> throwE (T.pack (show err)) - Right v -> pure v - let scoreOf x = - let inv = applyBiosphereMatrix db x - in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of - Right s -> Right s - Left e -> Left e - baselineScore <- case scoreOf baselineX of - Right s -> pure s - Left e -> throwE ("baseline scoring failed: " <> e) - let webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/sensitivity/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) - pertEntry (p, eitherX) = - let base = - [ "perturbation" - .= object - [ "consumer" .= perConsumer p - , "supplier" .= perSupplier p - , "delta" .= perDelta p - ] + runTool rid $ do + req <- loadLcaRequest dbManager args + let ld = lrLoaded req + db = ldDatabase ld + method = lrMethod req + dbName = lrDbName req + ra = lrResolved req + except $ ensureLinked dbName "computing sensitivity" db + perts <- + ExceptT $ + pure + ( parseArrayArg + "perturbations" + (Just "'perturbations' is required (array of {consumer, supplier, delta, label?})") + args :: + Either Text [Perturbation] + ) + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method + hier <- liftIO $ DM.getLocationHierarchy dbManager + eRes <- + liftIO $ + Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts + (baselineX, perResults) <- liftShow eRes + let scoreOf x = + let inv = applyBiosphereMatrix db x + in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of + Right s -> Right s + Left e -> Left e + baselineScore <- case scoreOf baselineX of + Right s -> pure s + Left e -> throwE ("baseline scoring failed: " <> e) + let webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/sensitivity/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) + pertEntry (p, eitherX) = + let base = + [ "perturbation" + .= object + [ "consumer" .= perConsumer p + , "supplier" .= perSupplier p + , "delta" .= perDelta p ] - withLabel = case perLabel p of - Just l -> ("label" .= l) : base - Nothing -> base - in case eitherX of - Left err -> object (("error" .= err) : withLabel) - Right x' -> case scoreOf x' of - Left err -> object (("error" .= err) : withLabel) - Right s -> - object - ( ("score" .= s) - : ("delta_score" .= (s - baselineScore)) - : withLabel - ) - pure $ - toolSuccessJson rid $ - object $ - [ "method" .= methodName method - , "category" .= methodCategory method - , "unit" .= methodUnit method - , "baseline_score" .= baselineScore - , "perturbed" .= map pertEntry perResults - ] - ++ webUrlPair - ) + ] + withLabel = case perLabel p of + Just l -> ("label" .= l) : base + Nothing -> base + in case eitherX of + Left err -> object (("error" .= err) : withLabel) + Right x' -> case scoreOf x' of + Left err -> object (("error" .= err) : withLabel) + Right s -> + object + ( ("score" .= s) + : ("delta_score" .= (s - baselineScore)) + : withLabel + ) + pure $ + toolSuccessJson rid $ + object $ + [ "method" .= methodName method + , "category" .= methodCategory method + , "unit" .= methodUnit method + , "baseline_score" .= baselineScore + , "perturbed" .= map pertEntry perResults + ] + ++ webUrlPair {- | Cross-database impact comparison for mapping audits. @@ -1180,65 +1187,62 @@ audit is designed to expose. -} callCompareImpacts :: DatabaseManager -> Value -> KeyMap Value -> IO Value callCompareImpacts dbManager rid args = - either (toolError rid) id - <$> runExceptT - ( do - argsA <- ExceptT . pure $ subArgs "_a" args - argsB <- ExceptT . pure $ subArgs "_b" args - reqA <- loadLcaRequest dbManager argsA - reqB <- loadLcaRequest dbManager argsB - irA <- runImpactsRequest dbManager argsA reqA - irB <- runImpactsRequest dbManager argsB reqB - let topN = fromMaybe 10 (intArg "top_flows" args) - scoreA = loScore (irOutcome irA) - scoreB = loScore (irOutcome irB) - delta = scoreA - scoreB - relPct = - if scoreB /= 0 - then abs delta / abs scoreB * 100 - else 0 - aTop = take topN (irContribs irA) - bTop = take topN (irContribs irB) - aMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irA] - bMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irB] - common = - [ object - [ "flow_name" .= bfName f - , "category" .= bfCompartmentName f - , "compartment" .= bfCompartmentSub f - , "a_contrib" .= cA - , "b_contrib" .= cB - , "delta" .= (cA - cB) - ] - | (f, _, cA) <- aTop - , let k = flowKey f - , Just cB <- [M.lookup k bMap] - ] - aOnly = - [ encodeContrib f c - | (f, _, c) <- aTop - , M.notMember (flowKey f) bMap - ] - bOnly = - [ encodeContrib f c - | (f, _, c) <- bTop - , M.notMember (flowKey f) aMap - ] - pure $ - toolSuccessJson rid $ - object - [ "a" .= sideJson reqA irA - , "b" .= sideJson reqB irB - , "delta" - .= object - [ "absolute" .= delta - , "relative_pct" .= relPct - ] - , "common_flows" .= common - , "top_a_only_flows" .= aOnly - , "top_b_only_flows" .= bOnly + runTool rid $ do + argsA <- except $ subArgs "_a" args + argsB <- except $ subArgs "_b" args + reqA <- loadLcaRequest dbManager argsA + reqB <- loadLcaRequest dbManager argsB + irA <- runImpactsRequest dbManager argsA reqA + irB <- runImpactsRequest dbManager argsB reqB + let topN = fromMaybe 10 (intArg "top_flows" args) + scoreA = loScore (irOutcome irA) + scoreB = loScore (irOutcome irB) + delta = scoreA - scoreB + relPct = + if scoreB /= 0 + then abs delta / abs scoreB * 100 + else 0 + aTop = take topN (irContribs irA) + bTop = take topN (irContribs irB) + aMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irA] + bMap = M.fromList [(flowKey f, c) | (f, _, c) <- irContribs irB] + common = + [ object + [ "flow_name" .= bfName f + , "category" .= bfCompartmentName f + , "compartment" .= bfCompartmentSub f + , "a_contrib" .= cA + , "b_contrib" .= cB + , "delta" .= (cA - cB) + ] + | (f, _, cA) <- aTop + , let k = flowKey f + , Just cB <- [M.lookup k bMap] + ] + aOnly = + [ encodeContrib f c + | (f, _, c) <- aTop + , M.notMember (flowKey f) bMap + ] + bOnly = + [ encodeContrib f c + | (f, _, c) <- bTop + , M.notMember (flowKey f) aMap + ] + pure $ + toolSuccessJson rid $ + object + [ "a" .= sideJson reqA irA + , "b" .= sideJson reqB irB + , "delta" + .= object + [ "absolute" .= delta + , "relative_pct" .= relPct ] - ) + , "common_flows" .= common + , "top_a_only_flows" .= aOnly + , "top_b_only_flows" .= bOnly + ] where sideJson req ir = let outcome = irOutcome ir @@ -1580,20 +1584,14 @@ unknown method, unresolvable process id). loadLcaRequest :: DatabaseManager -> KeyMap Value -> ExceptT Text IO LcaRequest loadLcaRequest dbManager args = do (dbName, pidText, methodIdText) <- - ExceptT $ - pure $ - (,,) - <$> requireText "database" args - <*> requireText "process_id" args - <*> requireText "method_id" args - mLoaded <- liftIO $ getDatabase dbManager dbName - ld <- case mLoaded of - Nothing -> throwE ("Database not loaded: " <> dbName) - Just x -> pure x + except $ + (,,) + <$> requireText "database" args + <*> requireText "process_id" args + <*> requireText "method_id" args + ld <- requireDatabase dbManager dbName (col, method) <- ExceptT (resolveMethod dbManager methodIdText) - (pid, act) <- case Service.resolveActivityAndProcessId (ldDatabase ld) pidText of - Left err -> throwE (T.pack (show err)) - Right v -> pure v + (pid, act) <- liftShow (Service.resolveActivityAndProcessId (ldDatabase ld) pidText) pure LcaRequest { lrDbName = dbName @@ -1624,138 +1622,132 @@ ensureLinked dbName op db = callGetContributingFlows :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callGetContributingFlows dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - req <- loadLcaRequest dbManager args - let ld = lrLoaded req - db = ldDatabase ld - method = lrMethod req - dbName = lrDbName req - ra = lrResolved req - lim = fromMaybe 20 (intArg "limit" args) - webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/contributing-flows/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) - ExceptT $ pure $ ensureLinked dbName "computing contributions" db - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - sol <- - ExceptT $ - computeInventoryMatrixWithDepsCached - unitCfg - (DM.mkDepSolverLookup dbManager) - db - dbName - (ldSharedSolver ld) - (raPid ra) - let inventory = SharedSolver.csInventory sol - tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method - let outcome = computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables - score = loScore outcome - (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables - contribs = L.sortOn (\(_, _, c) -> negate (abs c)) rawContribs - top = take lim contribs - hasNeg = any (\(_, _, c) -> c < 0) contribs - diagnosticsFields <- - if fromMaybe False (boolArg "include_diagnostics" args) - then do - idx <- liftIO $ DM.mapMethodToIndexCached dbManager dbName method - let opts = defaultUncharacterizedOpts - uncharacterized = - Mapping.findUncharacterized - unitCfg - mUnits - mFlows - inventory - tables - (DM.dmChemSynonyms dbManager) - idx - opts - pure - [ "uncharacterized_flows" .= map encodeUncharacterized uncharacterized - , "characterized_share" - .= ( if loInventoryAbsSum outcome > 0 - then loCharacterizedSum outcome / loInventoryAbsSum outcome - else 1 :: Double - ) + runTool rid $ do + req <- loadLcaRequest dbManager args + let ld = lrLoaded req + db = ldDatabase ld + method = lrMethod req + dbName = lrDbName req + ra = lrResolved req + lim = fromMaybe 20 (intArg "limit" args) + webUrlPair = webUrlField mBaseUrl ("/db/" <> dbName <> "/activity/" <> raText ra <> "/contributing-flows/" <> encodeSegment (lrCollection req) <> "/" <> lrMethodIdText req) + except $ ensureLinked dbName "computing contributions" db + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + sol <- + ExceptT $ + computeInventoryMatrixWithDepsCached + unitCfg + (DM.mkDepSolverLookup dbManager) + db + dbName + (ldSharedSolver ld) + (raPid ra) + let inventory = SharedSolver.csInventory sol + tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method + let outcome = computeLCIAScoreFromTables unitCfg mUnits mFlows inventory tables + score = loScore outcome + (rawContribs, unknownUuids) = inventoryContributions unitCfg mUnits mFlows inventory tables + contribs = L.sortOn (\(_, _, c) -> negate (abs c)) rawContribs + top = take lim contribs + hasNeg = any (\(_, _, c) -> c < 0) contribs + diagnosticsFields <- + if fromMaybe False (boolArg "include_diagnostics" args) + then do + idx <- liftIO $ DM.mapMethodToIndexCached dbManager dbName method + let opts = defaultUncharacterizedOpts + uncharacterized = + Mapping.findUncharacterized + unitCfg + mUnits + mFlows + inventory + tables + (DM.dmChemSynonyms dbManager) + idx + opts + pure + [ "uncharacterized_flows" .= map encodeUncharacterized uncharacterized + , "characterized_share" + .= ( if loInventoryAbsSum outcome > 0 + then loCharacterizedSum outcome / loInventoryAbsSum outcome + else 1 :: Double + ) + ] + else pure [] + liftIO $ + unless (null unknownUuids) $ + reportProgress Warning $ + "[MCP get_contributing_flows " + <> T.unpack (methodName method) + <> "] " + <> show (length unknownUuids) + <> " inventory flow UUID(s) absent from merged FlowDB. Samples: " + <> show (take 3 unknownUuids) + pure $ + toolSuccessJson rid $ + object $ + [ "method" .= methodName method + , "unit" .= methodUnit method + , "total_score" .= score + , "has_negative_contributions" .= hasNeg + , "top_flows" + .= [ object + [ "flow_name" .= bfName f + , "contribution" .= c + , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double) + , "flow_id" .= UUID.toText (bfId f) + , "category" .= bfCompartmentName f + , "compartment" .= bfCompartmentSub f + , "cf_value" .= cfVal ] - else pure [] - liftIO $ - unless (null unknownUuids) $ - reportProgress Warning $ - "[MCP get_contributing_flows " - <> T.unpack (methodName method) - <> "] " - <> show (length unknownUuids) - <> " inventory flow UUID(s) absent from merged FlowDB. Samples: " - <> show (take 3 unknownUuids) - pure $ - toolSuccessJson rid $ - object $ - [ "method" .= methodName method - , "unit" .= methodUnit method - , "total_score" .= score - , "has_negative_contributions" .= hasNeg - , "top_flows" - .= [ object - [ "flow_name" .= bfName f - , "contribution" .= c - , "contribution_percent" .= (if score /= 0 then c / score * 100 else 0 :: Double) - , "flow_id" .= UUID.toText (bfId f) - , "category" .= bfCompartmentName f - , "compartment" .= bfCompartmentSub f - , "cf_value" .= cfVal - ] - | (f, cfVal, c) <- top - ] - ] - ++ webUrlPair - ++ diagnosticsFields - ) + | (f, cfVal, c) <- top + ] + ] + ++ webUrlPair + ++ diagnosticsFields callGetContributingActivities :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callGetContributingActivities dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - req <- loadLcaRequest dbManager args - let ld = lrLoaded req - db = ldDatabase ld - method = lrMethod req - dbName = lrDbName req - ra = lrResolved req - lim = fromMaybe 10 (intArg "limit" args) - ExceptT $ pure $ ensureLinked dbName "computing contributions" db - unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager - (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager - tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method - -- Skip separate inventory compute: contributions sum equals the score. - contributions <- - ExceptT $ - crossDBProcessContributions - unitCfg - mUnits - mFlows - (DM.mkDepSolverLookup dbManager) - db - dbName - (ldSharedSolver ld) - (raPid ra) - tables - let score = sum (M.elems contributions) - sorted = L.sortOn (\(_, c) -> negate (abs c)) (M.toList contributions) - top = take lim sorted - hasNeg = any (\(_, c) -> c < 0) top - rows <- liftIO $ mapM (mkMcpCrossDBEntry dbManager dbName mBaseUrl (lrCollection req) (lrMethodIdText req) mFlows mUnits score) top - pure $ - toolSuccessJson rid $ - object - [ "method" .= methodName method - , "unit" .= methodUnit method - , "total_score" .= score - , "has_negative_contributions" .= hasNeg - , "processes" .= rows - ] - ) + runTool rid $ do + req <- loadLcaRequest dbManager args + let ld = lrLoaded req + db = ldDatabase ld + method = lrMethod req + dbName = lrDbName req + ra = lrResolved req + lim = fromMaybe 10 (intArg "limit" args) + except $ ensureLinked dbName "computing contributions" db + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + (mFlows, mUnits) <- liftIO $ DM.getMergedFlowMetadata dbManager + tables <- liftIO $ DM.mapMethodToTablesCached dbManager dbName db method + -- Skip separate inventory compute: contributions sum equals the score. + contributions <- + ExceptT $ + crossDBProcessContributions + unitCfg + mUnits + mFlows + (DM.mkDepSolverLookup dbManager) + db + dbName + (ldSharedSolver ld) + (raPid ra) + tables + let score = sum (M.elems contributions) + sorted = L.sortOn (\(_, c) -> negate (abs c)) (M.toList contributions) + top = take lim sorted + hasNeg = any (\(_, c) -> c < 0) top + rows <- liftIO $ mapM (mkMcpCrossDBEntry dbManager dbName mBaseUrl (lrCollection req) (lrMethodIdText req) mFlows mUnits score) top + pure $ + toolSuccessJson rid $ + object + [ "method" .= methodName method + , "unit" .= methodUnit method + , "total_score" .= score + , "has_negative_contributions" .= hasNeg + , "processes" .= rows + ] callListGeographies :: DatabaseManager -> Value -> KeyMap Value -> IO Value callListGeographies dbManager rid args = @@ -1833,29 +1825,26 @@ ground at a fraction of the bytes. Replaces the @N@ round-trips of -} callScoreActivity :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callScoreActivity dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - dbName <- ExceptT $ pure (requireText "database" args) - pidText <- ExceptT $ pure (requireText "process_id" args) - coll <- ExceptT $ pure (requireText "collection" args) - subs <- ExceptT $ pure (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) - wantedSets <- ExceptT $ pure (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text]) - let mSub = if null subs then Nothing else Just SubstitutionRequest{srSubstitutions = subs} - res <- liftIO $ BI.runActivityLCIABatch dbManager dbName pidText coll mSub - case res of - Left e -> ExceptT $ pure (Left (batchErrorMsg e)) - Right lbr -> do - configured <- liftIO $ configuredScoringSetNames dbManager coll - mActName <- liftIO $ lookupActivityName dbManager dbName pidText - let mTopUrl = scoreActivityWebUrl mBaseUrl dbName pidText coll - enriched = - maybe id attachMarketHintByName mActName $ - addWebUrlMaybe - mTopUrl - (slimLCIAPanel (toJSON lbr)) - ExceptT $ pure (toolSuccessJson rid <$> filterScoringSets configured wantedSets enriched) - ) + runTool rid $ do + dbName <- except (requireText "database" args) + pidText <- except (requireText "process_id" args) + coll <- except (requireText "collection" args) + subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) + wantedSets <- except (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text]) + let mSub = if null subs then Nothing else Just SubstitutionRequest{srSubstitutions = subs} + res <- liftIO $ BI.runActivityLCIABatch dbManager dbName pidText coll mSub + case res of + Left e -> throwE (batchErrorMsg e) + Right lbr -> do + configured <- liftIO $ configuredScoringSetNames dbManager coll + mActName <- liftIO $ lookupActivityName dbManager dbName pidText + let mTopUrl = scoreActivityWebUrl mBaseUrl dbName pidText coll + enriched = + maybe id attachMarketHintByName mActName $ + addWebUrlMaybe + mTopUrl + (slimLCIAPanel (toJSON lbr)) + except (toolSuccessJson rid <$> filterScoringSets configured wantedSets enriched) {- | Resolve the activity name for a (db, processId) pair. 'Nothing' when the database is not loaded or the PID does not resolve — callers fold @@ -1894,21 +1883,18 @@ unambiguous; see 'resolveSingleScoringSet' for the rules. -} callScoreActivities :: DatabaseManager -> Maybe Text -> Value -> KeyMap Value -> IO Value callScoreActivities dbManager mBaseUrl rid args = - either (toolError rid) id - <$> runExceptT - ( do - dbName <- ExceptT $ pure (requireText "database" args) - coll <- ExceptT $ pure (requireText "collection" args) - pids <- ExceptT $ pure (parseArrayArg "process_ids" (Just "'process_ids' required (array of strings)") args :: Either Text [Text]) - wantedSets <- ExceptT $ pure (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text]) - let summaryOnly = fromMaybe False (boolArg "summary_only" args) - configured <- liftIO $ configuredScoringSets dbManager coll - chosen <- ExceptT $ pure (resolveSingleScoringSet wantedSets configured) - res <- liftIO $ BI.runBatchImpacts dbManager dbName coll Nothing pids - case res of - Left e -> ExceptT $ pure (Left (batchErrorMsg e)) - Right bir -> pure (toolSuccessJson rid (toColumnarBatch summaryOnly mBaseUrl dbName coll chosen bir)) - ) + runTool rid $ do + dbName <- except (requireText "database" args) + coll <- except (requireText "collection" args) + pids <- except (parseArrayArg "process_ids" (Just "'process_ids' required (array of strings)") args :: Either Text [Text]) + wantedSets <- except (parseArrayArg "scoring_sets" Nothing args :: Either Text [Text]) + let summaryOnly = fromMaybe False (boolArg "summary_only" args) + configured <- liftIO $ configuredScoringSets dbManager coll + chosen <- except (resolveSingleScoringSet wantedSets configured) + res <- liftIO $ BI.runBatchImpacts dbManager dbName coll Nothing pids + case res of + Left e -> throwE (batchErrorMsg e) + Right bir -> pure (toolSuccessJson rid (toColumnarBatch summaryOnly mBaseUrl dbName coll chosen bir)) {- | Handler for the 'list_scoring_sets' MCP tool. From 04f98468536d1a29136fe9894afccf078be4a6c6 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 15:52:54 +0200 Subject: [PATCH 36/43] refactor(mcp): flatten diagonal tool handlers onto the ExceptT idiom The case-cascade handlers nested up to eight levels deep re-deriving DB lookups, method resolution, and Either unwrapping by hand. Rewrite them as flat `runTool rid $ do` blocks over requireDatabase / except / liftShow: getSupplyChain, getFlowMapping, getCharacterization, getActivity, listGeographies, plus the withDb helper and the getPathTo / getConsumers handlers. getFlowMapping now reuses resolveMethod instead of re-implementing the UUID-parse-and-filter lookup (identical error messages). Behaviour-preserving: same toolError text and the same short-circuit order. buildUnmatchedDbFlows keeps its empty-list fallback (an empty ranking is not an error), so it stays a plain IO helper. --- src/API/MCP.hs | 480 +++++++++++++++++++++---------------------------- 1 file changed, 206 insertions(+), 274 deletions(-) diff --git a/src/API/MCP.hs b/src/API/MCP.hs index 443c9fd5..e1d008dd 100644 --- a/src/API/MCP.hs +++ b/src/API/MCP.hs @@ -368,14 +368,10 @@ withDb :: KeyMap Value -> ((Database, SharedSolver) -> IO Value) -> IO Value -withDb dbManager rid args action = - case textArg "database" args of - Nothing -> return $ toolError rid "Missing required parameter: database" - Just dbName -> do - mLoaded <- getDatabase dbManager dbName - case mLoaded of - Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) - Just ld -> action (ldDatabase ld, ldSharedSolver ld) +withDb dbManager rid args action = runTool rid $ do + dbName <- except (requireText "database" args) + ld <- requireDatabase dbManager dbName + liftIO $ action (ldDatabase ld, ldSharedSolver ld) -- --------------------------------------------------------------------------- -- ExceptT plumbing shared by every handler @@ -595,36 +591,30 @@ callSearchFlows rid args (db, _) = a <|> _ = a callGetActivity :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value -callGetActivity rid args (db, _) = - case textArg "process_id" args of - Nothing -> return $ toolError rid "Missing required parameter: process_id" - Just pid -> case validatedExchangeType of - Left err -> return $ toolError rid err - Right _ -> - case Service.getActivityInfo defaultUnitConfig db pid of - Left err -> return $ toolError rid (T.pack $ show err) - Right val -> case fromJSON val of - -- 'val' was built from an 'ActivityInfo' upstream, so a - -- decode failure is genuinely defensive — pass it - -- through unchanged, hint-less. - Error _ -> return $ toolSuccessJson rid val - Success ai -> - -- Single resolve: take the activity name from the - -- 'ActivityInfo' already in hand instead of asking - -- the engine to resolve the PID again. - let attach = attachMarketHintByName (pfaName (piActivity ai)) - payload - | noFilters = val - | otherwise = - toJSON - ai - { piActivity = - (piActivity ai) - { pfaExchanges = - filter matchExchange (pfaExchanges (piActivity ai)) - } - } - in return $ toolSuccessJson rid (attach payload) +callGetActivity rid args (db, _) = runTool rid $ do + pid <- except (requireText "process_id" args) + _ <- except validatedExchangeType + val <- liftShow (Service.getActivityInfo defaultUnitConfig db pid) + pure $ case fromJSON val of + -- 'val' was built from an 'ActivityInfo' upstream, so a decode + -- failure is genuinely defensive — pass it through unchanged, hint-less. + Error _ -> toolSuccessJson rid val + Success ai -> + -- Single resolve: take the activity name from the 'ActivityInfo' + -- already in hand instead of asking the engine to resolve the PID again. + let attach = attachMarketHintByName (pfaName (piActivity ai)) + payload + | noFilters = val + | otherwise = + toJSON + ai + { piActivity = + (piActivity ai) + { pfaExchanges = + filter matchExchange (pfaExchanges (piActivity ai)) + } + } + in toolSuccessJson rid (attach payload) where exchangeType = textArg "exchange_type" args flowFilter = textArg "flow" args @@ -657,73 +647,47 @@ callGetActivity rid args (db, _) = Just want -> exchangeIsInput (ewuExchange ewu) == want callGetSupplyChain :: DatabaseManager -> Value -> KeyMap Value -> IO Value -callGetSupplyChain dbManager rid args = - case (,) <$> requireText "database" args <*> requireText "process_id" args of - Left err -> return $ toolError rid err - Right (dbName, pid) -> do - mLoaded <- getDatabase dbManager dbName - case mLoaded of - Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) - Just ld -> do - let db = ldDatabase ld - solver = ldSharedSolver ld - isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] - classFilters = case (textArg "classification" args, textArg "classification_value" args) of - (Just sys, Just val) -> [(sys, val, isExact)] - _ -> [] - scf = - Service.SupplyChainFilter - { Service.scfCore = - Service.ActivityFilterCore - { Service.afcName = textArg "name" args - , Service.afcLocation = textArg "location" args - , Service.afcProduct = Nothing - , Service.afcClassifications = classFilters - , Service.afcLimit = intArg "limit" args - , Service.afcOffset = Nothing - , Service.afcSort = Nothing - , Service.afcOrder = Nothing - } - , Service.scfMaxDepth = intArg "max_depth" args - , Service.scfMinQuantity = doubleArg "min_quantity" args - } - case parseArrayArg "substitutions" Nothing args :: Either Text [Substitution] of - Left err -> return $ toolError rid err - Right [] -> do - unitCfg <- DM.getMergedUnitConfig dbManager - result <- Service.getSupplyChain unitCfg (DM.mkDepSolverLookup dbManager) db dbName solver pid scf False - case result of - Left err -> return $ toolError rid (T.pack $ show err) - Right val -> return $ toolSuccessJson rid (toJSON val) - Right subs -> case Service.resolveActivityAndProcessId db pid of - Left err -> return $ toolError rid (T.pack $ show err) - Right (processId, _) -> do - eScaling <- - Service.computeScalingVectorWithSubstitutionsCrossDB - (DM.mkDepSolverLookup dbManager) - db - dbName - solver - processId - subs - case eScaling of - Left err -> return $ toolError rid (T.pack (show err)) - Right (scalingVec, virtualLinks) -> do - unitCfg <- DM.getMergedUnitConfig dbManager - eResp <- - Service.buildSupplyChainFromScalingVectorCrossDB - unitCfg - (DM.mkDepSolverLookup dbManager) - db - dbName - processId - scalingVec - virtualLinks - scf - False - case eResp of - Left e -> return $ toolError rid (T.pack (show e)) - Right v -> return $ toolSuccessJson rid (toJSON v) +callGetSupplyChain dbManager rid args = runTool rid $ do + (dbName, pid) <- except $ (,) <$> requireText "database" args <*> requireText "process_id" args + ld <- requireDatabase dbManager dbName + let db = ldDatabase ld + solver = ldSharedSolver ld + depLookup = DM.mkDepSolverLookup dbManager + isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] + classFilters = case (textArg "classification" args, textArg "classification_value" args) of + (Just sys, Just val) -> [(sys, val, isExact)] + _ -> [] + scf = + Service.SupplyChainFilter + { Service.scfCore = + Service.ActivityFilterCore + { Service.afcName = textArg "name" args + , Service.afcLocation = textArg "location" args + , Service.afcProduct = Nothing + , Service.afcClassifications = classFilters + , Service.afcLimit = intArg "limit" args + , Service.afcOffset = Nothing + , Service.afcSort = Nothing + , Service.afcOrder = Nothing + } + , Service.scfMaxDepth = intArg "max_depth" args + , Service.scfMinQuantity = doubleArg "min_quantity" args + } + subs <- except (parseArrayArg "substitutions" Nothing args :: Either Text [Substitution]) + unitCfg <- liftIO $ DM.getMergedUnitConfig dbManager + payload <- + if null subs + then -- Plain cross-DB supply chain. + toJSON <$> (liftIO (Service.getSupplyChain unitCfg depLookup db dbName solver pid scf False) >>= liftShow) + else do + -- Substitution-aware: re-solve the root scaling, then build from it. + (processId, _) <- liftShow (Service.resolveActivityAndProcessId db pid) + (scalingVec, virtualLinks) <- + liftIO (Service.computeScalingVectorWithSubstitutionsCrossDB depLookup db dbName solver processId subs) >>= liftShow + resp <- + liftIO (Service.buildSupplyChainFromScalingVectorCrossDB unitCfg depLookup db dbName processId scalingVec virtualLinks scf False) >>= liftShow + pure (toJSON resp) + pure $ toolSuccessJson rid payload {- | Generic SQL-group-by aggregation. One small primitive for "how much X is in Y" questions — replaces ad-hoc decomposition tools. @@ -796,51 +760,44 @@ callAggregate dbManager rid args (db, solver) = in Just (T.strip sys, T.strip val, isExact) callGetPathTo :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value -callGetPathTo rid args (db, solver) = - case (textArg "process_id" args, textArg "target" args) of - (Nothing, _) -> return $ toolError rid "Missing required parameter: process_id" - (_, Nothing) -> return $ toolError rid "Missing required parameter: target" - (Just pid, Just target) -> do - result <- Service.getPathTo db solver pid target - case result of - Left err -> return $ toolError rid (T.pack $ show err) - Right val -> return $ toolSuccessJson rid val +callGetPathTo rid args (db, solver) = runTool rid $ do + pid <- except (requireText "process_id" args) + target <- except (requireText "target" args) + val <- liftIO (Service.getPathTo db solver pid target) >>= liftShow + pure (toolSuccessJson rid val) callGetConsumers :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value -callGetConsumers presets rid args (db, _) = - case textArg "process_id" args of - Nothing -> return $ toolError rid "Missing required parameter: process_id" - Just pid -> - let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] - dbName = fromMaybe "" (textArg "database" args) -- validated by withDb - presetFilters = case textArg "preset" args of - Just pn -> case L.find (\p -> cpName p == pn) presets of - Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p] - Nothing -> [] - Nothing -> [] - explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of - (Just sys, Just val) -> [(sys, val, isExact)] - _ -> [] - classFilters = presetFilters ++ explicitFilters - cnf = - Service.ConsumerFilter - { Service.cnfCore = - Service.ActivityFilterCore - { Service.afcName = textArg "name" args - , Service.afcLocation = textArg "location" args - , Service.afcProduct = textArg "product" args - , Service.afcClassifications = classFilters - , Service.afcLimit = intArg "limit" args - , Service.afcOffset = Nothing - , Service.afcSort = Nothing - , Service.afcOrder = Nothing - } - , Service.cnfMaxDepth = intArg "max_depth" args - , Service.cnfIncludeEdges = fromMaybe False (boolArg "include_edges" args) +callGetConsumers presets rid args (db, _) = runTool rid $ do + pid <- except (requireText "process_id" args) + let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] + dbName = fromMaybe "" (textArg "database" args) -- validated by withDb + presetFilters = case textArg "preset" args of + Just pn -> case L.find (\p -> cpName p == pn) presets of + Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p] + Nothing -> [] + Nothing -> [] + explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of + (Just sys, Just val) -> [(sys, val, isExact)] + _ -> [] + classFilters = presetFilters ++ explicitFilters + cnf = + Service.ConsumerFilter + { Service.cnfCore = + Service.ActivityFilterCore + { Service.afcName = textArg "name" args + , Service.afcLocation = textArg "location" args + , Service.afcProduct = textArg "product" args + , Service.afcClassifications = classFilters + , Service.afcLimit = intArg "limit" args + , Service.afcOffset = Nothing + , Service.afcSort = Nothing + , Service.afcOrder = Nothing } - in case Service.getConsumers db dbName pid cnf of - Left err -> return $ toolError rid (T.pack $ show err) - Right results -> return $ toolSuccessJson rid (toJSON results) + , Service.cnfMaxDepth = intArg "max_depth" args + , Service.cnfIncludeEdges = fromMaybe False (boolArg "include_edges" args) + } + results <- liftShow (Service.getConsumers db dbName pid cnf) + pure (toolSuccessJson rid (toJSON results)) {- | MCP get_inventory: route through the cross-DB back-substitution path so inventories from dep DBs are merged into the returned flows. @@ -1313,64 +1270,52 @@ callListMethods dbManager rid = do return $ toolSuccessJson rid $ object ["methods" .= summaries] callGetFlowMapping :: DatabaseManager -> Value -> KeyMap Value -> IO Value -callGetFlowMapping dbManager rid args = - case (,) <$> requireText "database" args <*> requireText "method_id" args of - Left err -> return $ toolError rid err - Right (dbName, methodIdText) -> do - mLoaded <- getDatabase dbManager dbName - case mLoaded of - Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) - Just ld -> do - let db = ldDatabase ld - loadedMethods <- DM.getLoadedMethods dbManager - let allMethods = map snd loadedMethods - case UUID.fromText methodIdText of - Nothing -> return $ toolError rid "Invalid method UUID format" - Just uuid -> - case filter (\m -> methodId m == uuid) allMethods of - [] -> return $ toolError rid "Method not found" - (method : _) -> do - mappings <- DM.mapMethodToFlowsCached dbManager dbName db method - let stats = computeMappingStats mappings - total = msTotal stats - matched = total - msUnmatched stats - coverage = - if total > 0 - then fromIntegral matched / fromIntegral total * 100 :: Double - else 0 - verbose = fromMaybe False (boolArg "verbose" args) - maxUnm = fromMaybe 50 (intArg "max_unmatched" args) - extra <- - if not verbose - then pure [] - else do - let unmatchedCFs = - take - maxUnm - [ object - [ "name" .= mcfFlowName cf - , "cas" .= mcfCAS cf - , "compartment" .= mcfCompartment cf - , "cf_value" .= mcfValue cf - , "cf_unit" .= mcfUnit cf - ] - | (cf, Nothing) <- mappings - ] - unmatchedFlows <- buildUnmatchedDbFlows dbManager dbName db method args maxUnm - pure - [ "unmatched_cfs" .= unmatchedCFs - , "unmatched_db_flows" .= unmatchedFlows - ] - return $ - toolSuccessJson rid $ - object $ - [ "method" .= methodName method - , "total" .= total - , "matched" .= matched - , "unmatched" .= msUnmatched stats - , "coverage" .= coverage - ] - ++ extra +callGetFlowMapping dbManager rid args = runTool rid $ do + (dbName, methodIdText) <- except $ (,) <$> requireText "database" args <*> requireText "method_id" args + ld <- requireDatabase dbManager dbName + (_, method) <- ExceptT (resolveMethod dbManager methodIdText) + let db = ldDatabase ld + mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method + let stats = computeMappingStats mappings + total = msTotal stats + matched = total - msUnmatched stats + coverage = + if total > 0 + then fromIntegral matched / fromIntegral total * 100 :: Double + else 0 + verbose = fromMaybe False (boolArg "verbose" args) + maxUnm = fromMaybe 50 (intArg "max_unmatched" args) + extra <- + if not verbose + then pure [] + else do + let unmatchedCFs = + take + maxUnm + [ object + [ "name" .= mcfFlowName cf + , "cas" .= mcfCAS cf + , "compartment" .= mcfCompartment cf + , "cf_value" .= mcfValue cf + , "cf_unit" .= mcfUnit cf + ] + | (cf, Nothing) <- mappings + ] + unmatchedFlows <- liftIO $ buildUnmatchedDbFlows dbManager dbName db method args maxUnm + pure + [ "unmatched_cfs" .= unmatchedCFs + , "unmatched_db_flows" .= unmatchedFlows + ] + pure $ + toolSuccessJson rid $ + object $ + [ "method" .= methodName method + , "total" .= total + , "matched" .= matched + , "unmatched" .= msUnmatched stats + , "coverage" .= coverage + ] + ++ extra {- | Verbose-mode helper: rank unmatched DB flows for a method. @@ -1434,52 +1379,44 @@ buildUnmatchedDbFlows dbManager dbName db method args maxN = pure (map encodeUncharacterized uncharacterized) callGetCharacterization :: DatabaseManager -> Value -> KeyMap Value -> IO Value -callGetCharacterization dbManager rid args = - case (,) <$> requireText "database" args <*> requireText "method_id" args of - Left err -> return $ toolError rid err - Right (dbName, methodIdText) -> do - mLoaded <- getDatabase dbManager dbName - case mLoaded of - Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) - Just ld -> do - eMethod <- resolveMethod dbManager methodIdText - case eMethod of - Left err -> return $ toolError rid err - Right (_, method) -> do - let db = ldDatabase ld - lim = fromMaybe 20 (intArg "limit" args) - flowQ = textArg "flow" args - queryLower = fmap T.toLower flowQ - mappings <- DM.mapMethodToFlowsCached dbManager dbName db method - let matched = - [ (cf, f, strat) - | (cf, Just (f, strat)) <- mappings - , matchQuery queryLower (mcfFlowName cf) (bfName f) - ] - sorted = L.sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched - top = take lim sorted - mkEntry (cf, f, strat) = - object - [ "cf_flow_name" .= mcfFlowName cf - , "cf_value" .= mcfValue cf - , "cf_unit" .= mcfUnit cf - , "direction" .= (case mcfDirection cf of Input -> "Input" :: Text; Output -> "Output") - , "db_flow_name" .= bfName f - , "flow_id" .= UUID.toText (bfId f) - , "flow_unit" .= getUnitNameForBioFlow (dbUnits db) f - , "category" .= bfCompartmentName f - , "compartment" .= bfCompartmentSub f - , "match_strategy" .= show strat - ] - return $ - toolSuccessJson rid $ - object - [ "method" .= methodName method - , "unit" .= methodUnit method - , "matches" .= length matched - , "shown" .= length top - , "factors" .= map mkEntry top - ] +callGetCharacterization dbManager rid args = runTool rid $ do + (dbName, methodIdText) <- except $ (,) <$> requireText "database" args <*> requireText "method_id" args + ld <- requireDatabase dbManager dbName + (_, method) <- ExceptT (resolveMethod dbManager methodIdText) + let db = ldDatabase ld + lim = fromMaybe 20 (intArg "limit" args) + flowQ = textArg "flow" args + queryLower = fmap T.toLower flowQ + mappings <- liftIO $ DM.mapMethodToFlowsCached dbManager dbName db method + let matched = + [ (cf, f, strat) + | (cf, Just (f, strat)) <- mappings + , matchQuery queryLower (mcfFlowName cf) (bfName f) + ] + sorted = L.sortOn (\(cf, _, _) -> negate (abs (mcfValue cf))) matched + top = take lim sorted + mkEntry (cf, f, strat) = + object + [ "cf_flow_name" .= mcfFlowName cf + , "cf_value" .= mcfValue cf + , "cf_unit" .= mcfUnit cf + , "direction" .= (case mcfDirection cf of Input -> "Input" :: Text; Output -> "Output") + , "db_flow_name" .= bfName f + , "flow_id" .= UUID.toText (bfId f) + , "flow_unit" .= getUnitNameForBioFlow (dbUnits db) f + , "category" .= bfCompartmentName f + , "compartment" .= bfCompartmentSub f + , "match_strategy" .= show strat + ] + pure $ + toolSuccessJson rid $ + object + [ "method" .= methodName method + , "unit" .= methodUnit method + , "matches" .= length matched + , "shown" .= length top + , "factors" .= map mkEntry top + ] where matchQuery Nothing _ _ = True matchQuery (Just q) cfName dbFlowName = T.isInfixOf q (T.toLower cfName) || T.isInfixOf q (T.toLower dbFlowName) @@ -1750,29 +1687,24 @@ callGetContributingActivities dbManager mBaseUrl rid args = ] callListGeographies :: DatabaseManager -> Value -> KeyMap Value -> IO Value -callListGeographies dbManager rid args = - case textArg "database" args of - Nothing -> return $ toolError rid "Missing required parameter: database" - Just dbName -> do - mLoaded <- getDatabase dbManager dbName - case mLoaded of - Nothing -> return $ toolError rid ("Database not loaded: " <> dbName) - Just ld -> do - let db = ldDatabase ld - geoMap = dmGeographies dbManager - codes = L.sort $ M.keys (idxByLocation (dbIndexes db)) - mkEntry code = - let (displayName, parents) = M.findWithDefault (code, []) code geoMap - parentStr = T.intercalate "|" parents - in object - [ "geo" .= code - , "display_name" .= displayName - , "parent_regions" .= parentStr - ] - return $ - toolSuccessJson rid $ - object - ["geographies" .= map mkEntry codes] +callListGeographies dbManager rid args = runTool rid $ do + dbName <- except (requireText "database" args) + ld <- requireDatabase dbManager dbName + let db = ldDatabase ld + geoMap = dmGeographies dbManager + codes = L.sort $ M.keys (idxByLocation (dbIndexes db)) + mkEntry code = + let (displayName, parents) = M.findWithDefault (code, []) code geoMap + parentStr = T.intercalate "|" parents + in object + [ "geo" .= code + , "display_name" .= displayName + , "parent_regions" .= parentStr + ] + pure $ + toolSuccessJson rid $ + object + ["geographies" .= map mkEntry codes] -- ============================================================================ -- score_activity / score_activities / list_scoring_sets From 5797e91e31acca2b32be342c5809020267ec9473 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 15:56:06 +0200 Subject: [PATCH 37/43] refactor(mcp): remove duplicated and redundant pure helper code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Factor the classification-filter construction (preset lookup + explicit classification/value args) into explicitClassFilter and classificationFilters, shared by the search, consumers, and supply-chain handlers instead of three near-identical inline copies. - Drop the two local `<|>` rebindings in the search handlers; they re-implemented the Alternative Maybe instance now imported directly. - Collapse a `case … of Right s -> Right s; Left e -> Left e` (an open-coded identity on Either) in compute_sensitivity's scoreOf. Behaviour-preserving: supply-chain keeps explicit-only filters (it has no preset arg), and `<|>` on Maybe matches the deleted definitions. --- src/API/MCP.hs | 69 +++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/src/API/MCP.hs b/src/API/MCP.hs index e1d008dd..7b07086b 100644 --- a/src/API/MCP.hs +++ b/src/API/MCP.hs @@ -507,6 +507,29 @@ callListPresets presets rid = | p <- presets ] +{- | Explicit classification filter from the @classification@ + +@classification_value@ args (honouring @classification_match@). Shared by the +search, consumers, and supply-chain handlers. +-} +explicitClassFilter :: KeyMap Value -> [(Text, Text, Bool)] +explicitClassFilter args = case (textArg "classification" args, textArg "classification_value" args) of + (Just sys, Just val) -> [(sys, val, isExact)] + _ -> [] + where + isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] + +{- | Preset filters (looked up by @preset@ name) followed by the explicit +filter. Shared by the search and consumers handlers. +-} +classificationFilters :: [ClassificationPreset] -> KeyMap Value -> [(Text, Text, Bool)] +classificationFilters presets args = presetFilters ++ explicitClassFilter args + where + presetFilters = case textArg "preset" args of + Just pn -> case L.find (\p -> cpName p == pn) presets of + Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p] + Nothing -> [] + Nothing -> [] + callSearchActivities :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value callSearchActivities presets rid args (db, _) = do let name = textArg "name" args @@ -514,24 +537,14 @@ callSearchActivities presets rid args (db, _) = do product' = textArg "product" args limit = intArg "limit" args exact = fromMaybe False (boolArg "exact" args) - isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] - presetFilters = case textArg "preset" args of - Just pn -> case L.find (\p -> cpName p == pn) presets of - Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p] - Nothing -> [] - Nothing -> [] - explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of - (Just sys, Just val) -> [(sys, val, isExact)] - _ -> [] - classFilters = presetFilters ++ explicitFilters - let sf = + sf = Service.SearchFilter { Service.sfCore = Service.ActivityFilterCore { Service.afcName = name , Service.afcLocation = geo , Service.afcProduct = product' - , Service.afcClassifications = classFilters + , Service.afcClassifications = classificationFilters presets args , Service.afcLimit = limit <|> Just 20 , Service.afcOffset = Nothing , Service.afcSort = Nothing @@ -543,9 +556,6 @@ callSearchActivities presets rid args (db, _) = do case result of Left err -> return $ toolError rid (T.pack $ show err) Right val -> return $ toolSuccessJson rid val - where - Nothing <|> b = b - a <|> _ = a callListClassifications :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value callListClassifications rid args (db, _) = @@ -586,9 +596,6 @@ callSearchFlows rid args (db, _) = case result of Left err -> return $ toolError rid (T.pack $ show err) Right val -> return $ toolSuccessJson rid val - where - Nothing <|> b = b - a <|> _ = a callGetActivity :: Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value callGetActivity rid args (db, _) = runTool rid $ do @@ -653,10 +660,6 @@ callGetSupplyChain dbManager rid args = runTool rid $ do let db = ldDatabase ld solver = ldSharedSolver ld depLookup = DM.mkDepSolverLookup dbManager - isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] - classFilters = case (textArg "classification" args, textArg "classification_value" args) of - (Just sys, Just val) -> [(sys, val, isExact)] - _ -> [] scf = Service.SupplyChainFilter { Service.scfCore = @@ -664,7 +667,7 @@ callGetSupplyChain dbManager rid args = runTool rid $ do { Service.afcName = textArg "name" args , Service.afcLocation = textArg "location" args , Service.afcProduct = Nothing - , Service.afcClassifications = classFilters + , Service.afcClassifications = explicitClassFilter args , Service.afcLimit = intArg "limit" args , Service.afcOffset = Nothing , Service.afcSort = Nothing @@ -769,17 +772,7 @@ callGetPathTo rid args (db, solver) = runTool rid $ do callGetConsumers :: [ClassificationPreset] -> Value -> KeyMap Value -> (Database, SharedSolver) -> IO Value callGetConsumers presets rid args (db, _) = runTool rid $ do pid <- except (requireText "process_id" args) - let isExact = textArg "classification_match" args `elem` [Just "equals", Just "exact"] - dbName = fromMaybe "" (textArg "database" args) -- validated by withDb - presetFilters = case textArg "preset" args of - Just pn -> case L.find (\p -> cpName p == pn) presets of - Just p -> [(ceSystem e, ceValue e, ceMode e == "exact") | e <- cpFilters p] - Nothing -> [] - Nothing -> [] - explicitFilters = case (textArg "classification" args, textArg "classification_value" args) of - (Just sys, Just val) -> [(sys, val, isExact)] - _ -> [] - classFilters = presetFilters ++ explicitFilters + let dbName = fromMaybe "" (textArg "database" args) -- validated by withDb cnf = Service.ConsumerFilter { Service.cnfCore = @@ -787,7 +780,7 @@ callGetConsumers presets rid args (db, _) = runTool rid $ do { Service.afcName = textArg "name" args , Service.afcLocation = textArg "location" args , Service.afcProduct = textArg "product" args - , Service.afcClassifications = classFilters + , Service.afcClassifications = classificationFilters presets args , Service.afcLimit = intArg "limit" args , Service.afcOffset = Nothing , Service.afcSort = Nothing @@ -1086,11 +1079,7 @@ callComputeSensitivity dbManager mBaseUrl rid args = liftIO $ Service.computeSensitivities db (ldSharedSolver ld) (raPid ra) perts (baselineX, perResults) <- liftShow eRes - let scoreOf x = - let inv = applyBiosphereMatrix db x - in case computeLCIAScoreAuto unitCfg mUnits mFlows db x inv hier tables of - Right s -> Right s - Left e -> Left e + let scoreOf x = computeLCIAScoreAuto unitCfg mUnits mFlows db x (applyBiosphereMatrix db x) hier tables baselineScore <- case scoreOf baselineX of Right s -> pure s Left e -> throwE ("baseline scoring failed: " <> e) From c059d65422cdfbfb776c2e92400878e50544ef5e Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 19:20:37 +0200 Subject: [PATCH 38/43] fix(method): emit SimaPro categories that have no rows A category header with zero CF rows (or a damage category with zero impact rows) emits its Method/DamageCategory again, as before the Stage-ADT refactor. A category can be declared before its factors are added; dropping it silently would hide that from downstream. --- src/Method/ParserSimaPro.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Method/ParserSimaPro.hs b/src/Method/ParserSimaPro.hs index 0cae0dc7..037c3fdc 100644 --- a/src/Method/ParserSimaPro.hs +++ b/src/Method/ParserSimaPro.hs @@ -180,17 +180,19 @@ step cfg st line = case psStage st of where stripped = BS8.strip line --- | Append the completed in-progress block (if any) onto the right list. +-- | Append the completed category. A header with zero CF rows still emits an +-- empty 'Method': a category can be declared before its factors are added, and +-- silently dropping it would hide that from downstream. -- Shared by 'step' (mid-stream, on blanks/markers/End) and 'finalize' (at EOF). finishCat :: CatAccum -> ParseState -> ParseState -finishCat (CatAccum name unit factors) st - | null factors = st - | otherwise = st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st} +finishCat (CatAccum name unit factors) st = + st{psMethods = buildMethod (psMethodology st) name unit (reverse factors) : psMethods st} +-- | Append the completed damage category, including one with zero impact rows +-- (same rationale as 'finishCat'). finishDamage :: DamageAccum -> ParseState -> ParseState -finishDamage (DamageAccum name unit impacts) st - | null impacts = st - | otherwise = st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st} +finishDamage (DamageAccum name unit impacts) st = + st{psDamageCats = DamageCategory name unit (reverse impacts) : psDamageCats st} finishNW :: NWAccum -> ParseState -> ParseState finishNW (NWAccum name norm weight) st From 1702b503ad1fdb85eba7ca78c5d3600ada4c283b Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 19:20:42 +0200 Subject: [PATCH 39/43] fix(service): keep the root graph node at index 0 below cutoff When the root activity falls below the cutoff threshold it is prepended (becoming node 0) rather than left at its natural index, restoring the node ordering and id mapping from before the cutoff refactor. Its value is read from the supply list via lookup, so no partial indexing. --- src/Service.hs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Service.hs b/src/Service.hs index fa2e8496..af2ac98a 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -673,19 +673,22 @@ filterTreeExport pat export = meta = (teTree export){tmTotalNodes = M.size filteredNodes} in export{teTree = meta, teNodes = filteredNodes, teEdges = filteredEdges} -{- | Activities whose absolute cumulative value clears the threshold, plus the -root activity (which we always surface, even if it falls below). Out-of-bounds -roots become a zero-valued entry rather than crashing. +{- | Activities whose absolute cumulative value clears the threshold. The root +activity is always surfaced: above threshold it keeps its natural position; +otherwise it is prepended (becoming node 0) with its actual supply value, or +0 when out of bounds. -} selectSignificantActivities :: Double -> ProcessId -> [Double] -> [(ProcessId, Double)] selectSignificantActivities threshold rootPid supplyList = - let kept = + let aboveThreshold = [ (fromIntegral idx :: ProcessId, val) | (idx, val) <- zip [(0 :: Int) ..] supplyList - , abs val > threshold || idx == fromIntegral rootPid + , abs val > threshold ] - rootInBounds = fromIntegral rootPid < length supplyList - in if rootInBounds then kept else (rootPid, 0.0) : kept + rootValue = fromMaybe 0.0 (lookup (fromIntegral rootPid :: Int) (zip [0 ..] supplyList)) + in if any ((== rootPid) . fst) aboveThreshold + then aboveThreshold + else (rootPid, rootValue) : aboveThreshold {- | True iff the exchange is a technosphere @Input@ whose link points at the given target activity. Waste exchanges aren't traversed by the graph builder From c979d4b810515963cb9cb250c58269714c55514e Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 19:20:47 +0200 Subject: [PATCH 40/43] refactor(app): construct AppEnv with record syntax Drop the mkAppEnv wrapper: five positional arguments with two adjacent Maybes are easy to transpose. Call sites now name every field. --- app/Main.hs | 11 +++++++++-- src/API/BatchImpacts.hs | 20 +++++++++++++++++--- src/App/Env.hs | 10 ---------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index be95a1d0..9a9272ae 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -39,7 +39,7 @@ import Progress import API.Licenses (licensesResponse) import API.MCP (mcpApp, toolDefinitions) import API.Routes (lcaAPI, lcaServer, volcaOpenApi) -import App.Env (mkAppEnv) +import App.Env (AppEnv (..)) import Data.Aeson (encode) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as C8 @@ -337,7 +337,14 @@ createServerApp dbManager maxTreeDepth staticDir desktopMode password hostingCon unless (desktopMode || hasFrontend) $ reportProgress Info "Frontend not bundled — MCP responses will omit 'web_url'" mcp <- mcpApp dbManager filterPresets hasFrontend - let env = mkAppEnv dbManager maxTreeDepth password hostingConfig filterPresets + let env = + AppEnv + { aeDbManager = dbManager + , aeMaxTreeDepth = maxTreeDepth + , aePassword = password + , aeHostingConfig = hostingConfig + , aeClassificationPresets = filterPresets + } apiApp = serve lcaAPI (lcaServer env) pure $ \req respond -> do unless desktopMode (logRequest req) diff --git a/src/API/BatchImpacts.hs b/src/API/BatchImpacts.hs index 1624ff49..d3d189cd 100644 --- a/src/API/BatchImpacts.hs +++ b/src/API/BatchImpacts.hs @@ -21,7 +21,7 @@ module API.BatchImpacts ( import API.Routes (activityLCIABatchH, batchImpactsH, collectionNotLoadedPrefix, databaseNotLoadedPrefix) import API.Types (BatchImpactsRequest (..), BatchImpactsResponse, LCIABatchResult, SubstitutionRequest) -import App.Env (mkAppEnv, runApp) +import App.Env (AppEnv (..), runApp) import Control.Concurrent.STM (readTVarIO) import qualified Data.ByteString.Lazy as BSL import qualified Data.Map as M @@ -78,7 +78,14 @@ runActivityLCIABatch :: Maybe SubstitutionRequest -> IO (Either BatchError LCIABatchResult) runActivityLCIABatch dbm dbName pid coll mSub = do - let env = mkAppEnv dbm 0 Nothing Nothing [] + let env = + AppEnv + { aeDbManager = dbm + , aeMaxTreeDepth = 0 + , aePassword = Nothing + , aeHostingConfig = Nothing + , aeClassificationPresets = [] + } res <- Servant.runHandler (runApp env (activityLCIABatchH dbName pid coll mSub)) case res of Right lbr -> pure (Right lbr) @@ -101,7 +108,14 @@ runBatchImpacts :: [Text] -> IO (Either BatchError BatchImpactsResponse) runBatchImpacts dbm dbName coll topFlows pids = do - let env = mkAppEnv dbm 0 Nothing Nothing [] + let env = + AppEnv + { aeDbManager = dbm + , aeMaxTreeDepth = 0 + , aePassword = Nothing + , aeHostingConfig = Nothing + , aeClassificationPresets = [] + } res <- Servant.runHandler $ runApp env $ diff --git a/src/App/Env.hs b/src/App/Env.hs index e02d6ba6..6aeabd79 100644 --- a/src/App/Env.hs +++ b/src/App/Env.hs @@ -5,7 +5,6 @@ -- mapping passed to Servant's 'hoistServer'. module App.Env ( AppEnv (..), - mkAppEnv, AppM (..), runApp, ) where @@ -26,15 +25,6 @@ data AppEnv = AppEnv , aeClassificationPresets :: ![Config.ClassificationPreset] } -mkAppEnv - :: DatabaseManager - -> Int - -> Maybe String - -> Maybe Config.HostingConfig - -> [Config.ClassificationPreset] - -> AppEnv -mkAppEnv = AppEnv - newtype AppM a = AppM {unAppM :: ReaderT AppEnv Handler a} deriving newtype (Functor, Applicative, Monad, MonadIO, MonadReader AppEnv, MonadError ServerError) From 0cec4107f242a42ee48b6fe34ea612a01c379551 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 19:20:56 +0200 Subject: [PATCH 41/43] fix(json): keep the field name when the prefix strip empties it An all-lowercase field has no Type-prefix boundary, so dropWhile isLower would produce an empty JSON key. Fall back to the original label. --- src/API/JsonOptions.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/API/JsonOptions.hs b/src/API/JsonOptions.hs index 15caea6e..e55094b5 100644 --- a/src/API/JsonOptions.hs +++ b/src/API/JsonOptions.hs @@ -35,11 +35,14 @@ import GHC.Generics (Generic, Rep) stripLowerPrefix :: Options stripLowerPrefix = defaultOptions - { fieldLabelModifier = lowerFirst . dropWhile isLower + { fieldLabelModifier = stripPrefix } where - lowerFirst "" = "" - lowerFirst (c : cs) = toLower c : cs + -- Drop the lowercase Type-prefix (@fooBar -> bar@). An all-lowercase field + -- has no prefix boundary, so keep it verbatim rather than emit an empty key. + stripPrefix label = case dropWhile isLower label of + "" -> label + (c : cs) -> toLower c : cs strippedToJSON :: (Generic a, GToJSON' Value Zero (Rep a)) => a -> Value strippedToJSON = genericToJSON stripLowerPrefix From 9c5e1a6c240a86f28791eb586622930f69530709 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 19:21:24 +0200 Subject: [PATCH 42/43] refactor(solver): dedup factorization caching and cross-DB dep helpers (#98) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary A readability pass over `SharedSolver.hs` that removes duplicated logic without changing behaviour. - **Factorization caching deduped.** The lazy MUMPS factorization was computed-and-cached in two near-identical blocks (`solveWithSharedSolver`, `ensureFactorization`). Extracted `computeAndStoreFactorization` so the precompute + store lives once; the dense-solver fallback stays scoped to the first-solve (cache-miss) path exactly as before. - **Shared dep-demand helpers.** The per-root dep-demand → demand-vector conversion was copied byte-for-byte in three resolvers (`resolveDep`, `resolveDepContribs`, and `Service.resolveDepWithSubs`). Extracted `prepareDepDemandVecs`, plus `depDbsOf` for the "DBs referenced at this level" set. - **`Semigroup CrossDBSolution`.** `mergeSolutions` is now `foldl' (<>)` — same summed inventory and base-first / BFS-ordered scalings as the hand-written version. - **Named map types.** Introduced `SupplierDemands` / `DepDemands` aliases in `Matrix` (where the type is produced) and used them across the affected signatures, replacing the verbose nested-`Map` spelling. We deliberately stopped short of unifying the two dependency-graph walks (inventory vs. contributions) behind a generic interpreter: it barely reduces line count, would rely on a K=1 invariant the types don't enforce, and can't absorb the third walk in `Service.hs`. ## Test plan - [x] `./gen-version.sh && cabal build` — clean (only two pre-existing, unrelated `API/MCP.hs` warnings). - [x] `cabal run lca-tests` — 1107 examples, 0 failures, 1 pending. - [x] Rebased onto current `advanced_haskell`; rebuilt and re-ran the full suite green on the rebased tree. --- src/Matrix.hs | 15 ++++- src/Service.hs | 16 +++-- src/SharedSolver.hs | 141 ++++++++++++++++++++++---------------------- 3 files changed, 90 insertions(+), 82 deletions(-) diff --git a/src/Matrix.hs b/src/Matrix.hs index 2588b59e..cee30f0c 100644 --- a/src/Matrix.hs +++ b/src/Matrix.hs @@ -33,6 +33,8 @@ Performance characteristics: module Matrix ( Vector, Inventory, + SupplierDemands, + DepDemands, computeScalingVector, applyBiosphereMatrix, computeInventoryMatrix, @@ -95,6 +97,13 @@ type Vector = U.Vector Double -- | Final inventory vector mapping biosphere flow UUIDs to quantities. type Inventory = M.Map UUID Double +-- | Demand a consumer DB places on one supplier @(activityUUID, productUUID)@: +-- the amount plus the consumer's exchange unit (for cross-DB unit conversion). +type SupplierDemands = M.Map (UUID, UUID) (Double, Text) + +-- | Per-dependency-DB supplier demands, keyed by source database name. +type DepDemands = M.Map Text SupplierDemands + {- | Per-database coalescing solver. A single worker thread owns the MUMPS handle and drains a queue of solve requests, batching whatever has piled up into one 'mumpsSolveMulti' call per round. Concurrent requests on the same @@ -727,7 +736,7 @@ before accumulating. accumulateDepDemands :: Database -> Vector -> - M.Map Text (M.Map (UUID, UUID) (Double, Text)) + DepDemands accumulateDepDemands db = accumulateDepDemandsWith db [] {- | Same as 'accumulateDepDemands' but folds over an additional list of @@ -743,7 +752,7 @@ accumulateDepDemandsWith :: Database -> [CrossDBLink] -> Vector -> - M.Map Text (M.Map (UUID, UUID) (Double, Text)) + DepDemands accumulateDepDemandsWith db extraLinks scalingVec = foldr step M.empty (dbCrossDBLinks db ++ extraLinks) where @@ -818,7 +827,7 @@ depDemandsToVector :: -- | dep DB name, for error messages Text -> Database -> - M.Map (UUID, UUID) (Double, Text) -> + SupplierDemands -> Either Text Vector depDemandsToVector unitConfig depDbName depDb demands = do converted <- traverse convertEntry (M.toList demands) diff --git a/src/Service.hs b/src/Service.hs index af2ac98a..847e422b 100644 --- a/src/Service.hs +++ b/src/Service.hs @@ -30,7 +30,7 @@ import qualified Data.UUID as UUID import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Database (applyStructuredFilters, findActivitiesByFields, findFlowsBySynonym) -import Matrix (Inventory, accumulateDepDemandsWith, activityNormalizationFactor, applyBiosphereMatrix, applySparseMatrix, buildDemandVectorFromIndex, computeInventoryMatrix, depDemandsToVector, perturbA, perturbABatch, toList) +import Matrix (DepDemands, Inventory, accumulateDepDemandsWith, activityNormalizationFactor, applyBiosphereMatrix, applySparseMatrix, buildDemandVectorFromIndex, computeInventoryMatrix, depDemandsToVector, perturbA, perturbABatch, toList) import qualified Matrix.Export as MatrixExport import Plugin.Types (Severity (..), ValidateContext (..), ValidateHandle (..), ValidationIssue (..), ValidationPhase (..)) import qualified Progress @@ -2280,7 +2280,7 @@ resolveDepWithSubs :: SharedSolver.DepSolverLookup -> -- | ROOT DB's name (default for bare consumer/from/to refs) RootDb -> - [M.Map Text (M.Map (UUID, UUID) (Double, Text))] -> + [DepDemands] -> [Substitution] -> Int -> Int -> @@ -2294,13 +2294,11 @@ resolveDepWithSubs unitCfg depLookup rootDb perRootDepDemands allSubs depth k de -- contributes 'Nothing' at every root; dropped before merge. pure (Right (replicate k Nothing)) Just (depDb, depSolver) -> - let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands - depVecsE = traverse (depDemandsToVector unitCfg depDbName depDb) demandsPerRoot - in case depVecsE of - Left err -> pure (Left (MatrixError err)) - Right depDemandVecs -> do - sols <- goWithSubsAndDeps unitCfg depLookup depDb (ThisDb depDbName) rootDb depSolver depDemandVecs allSubs (depth + 1) - pure $ fmap (map Just) sols + case SharedSolver.prepareDepDemandVecs unitCfg depDbName depDb perRootDepDemands of + Left err -> pure (Left (MatrixError err)) + Right depDemandVecs -> do + sols <- goWithSubsAndDeps unitCfg depLookup depDb (ThisDb depDbName) rootDb depSolver depDemandVecs allSubs (depth + 1) + pure $ fmap (map Just) sols {- | Cross-DB substitution resolver (root-only path, used by supply-chain). diff --git a/src/SharedSolver.hs b/src/SharedSolver.hs index 358d2823..b0798eea 100644 --- a/src/SharedSolver.hs +++ b/src/SharedSolver.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {- | @@ -33,12 +34,13 @@ module SharedSolver ( computeInventoryMatrixBatchWithDepsCached, goWithDepsFromScalings, mergeSolutions, + prepareDepDemandVecs, crossDBProcessContributions, ) where import Control.Concurrent.Async (mapConcurrently) -import Control.Concurrent.MVar (MVar, modifyMVar, modifyMVar_, newMVar, readMVar, withMVar) -import Control.Exception (SomeException, catch) +import Control.Concurrent.MVar (MVar, modifyMVar_, newMVar, readMVar, withMVar) +import Control.Exception (SomeException, try) import Data.List (transpose) import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NE @@ -47,6 +49,7 @@ import Data.Maybe (catMaybes) import qualified Data.Set as S import Data.Text (Text) import Matrix ( + DepDemands, Inventory, Vector, accumulateDepDemands, @@ -89,40 +92,36 @@ createSharedSolver dbName techTriples activityCount = do factVar <- newMVar Nothing return $ SharedSolver lock factVar techTriples activityCount dbName +{- | Compute the factorization and cache it. Assumes 'solverLock' is held and +the cache is empty (i.e. only called on a miss). Shared by the first-solve +path and 'ensureFactorization'. +-} +computeAndStoreFactorization :: SharedSolver -> IO MatrixFactorization +computeAndStoreFactorization solver = do + reportProgress Info $ "Factorizing '" ++ show (solverDbName solver) ++ "' on first use" + fact <- + precomputeMatrixFactorization + (solverDbName solver) + (solverTechTriples solver) + (solverActivityCount solver) + modifyMVar_ (solverFactorizationVar solver) (const (pure (Just fact))) + pure fact + -- | Solve using shared solver. On first call, triggers lazy factorization. solveWithSharedSolver :: SharedSolver -> Vector -> IO Vector -solveWithSharedSolver solver demandVector = do - withMVar (solverLock solver) $ \_ -> do - maybeFact <- readMVar (solverFactorizationVar solver) - case maybeFact of - Just factorization -> do +solveWithSharedSolver solver demandVector = + withMVar (solverLock solver) $ \_ -> + readMVar (solverFactorizationVar solver) >>= \case + Just fact -> do reportProgress Solver "Using cached factorization for sub-second solve" - solveSparseLinearSystemWithFactorization factorization demandVector - Nothing -> do - -- First solve: factorize, cache, then solve - reportProgress Info $ "First solve for '" ++ show (solverDbName solver) ++ "' — computing factorization..." - factResult <- - catch - ( do - factorization <- - precomputeMatrixFactorization - (solverDbName solver) - (solverTechTriples solver) - (solverActivityCount solver) - -- Store for subsequent solves (replace the MVar contents) - _ <- modifyMVar (solverFactorizationVar solver) $ \_ -> return (Just factorization, ()) - reportProgress Info "Factorization complete — solving" - result <- solveSparseLinearSystemWithFactorization factorization demandVector - return (Just result) - ) - ( \e -> do - reportProgress Solver $ "Factorization failed: " ++ show (e :: SomeException) ++ " — using fallback solver" - return Nothing - ) - case factResult of - Just result -> return result - Nothing -> - solveSparseLinearSystem (solverTechTriples solver) (solverActivityCount solver) demandVector + solveSparseLinearSystemWithFactorization fact demandVector + Nothing -> + try (computeAndStoreFactorization solver >>= flip solveSparseLinearSystemWithFactorization demandVector) + >>= either fallback pure + where + fallback e = do + reportProgress Solver $ "Factorization failed: " ++ show (e :: SomeException) ++ " — using fallback solver" + solveSparseLinearSystem (solverTechTriples solver) (solverActivityCount solver) demandVector -- | Read the cached factorization without solving. Returns Nothing until the first solve. getFactorization :: SharedSolver -> IO (Maybe MatrixFactorization) @@ -132,19 +131,10 @@ getFactorization solver = readMVar (solverFactorizationVar solver) Safe to call from multiple threads: the solverLock serializes first-time factorization. -} ensureFactorization :: SharedSolver -> IO MatrixFactorization -ensureFactorization solver = withMVar (solverLock solver) $ \_ -> do - maybeFact <- readMVar (solverFactorizationVar solver) - case maybeFact of +ensureFactorization solver = withMVar (solverLock solver) $ \_ -> + readMVar (solverFactorizationVar solver) >>= \case Just fact -> pure fact - Nothing -> do - reportProgress Info $ "Factorizing '" ++ show (solverDbName solver) ++ "' on first use" - fact <- - precomputeMatrixFactorization - (solverDbName solver) - (solverTechTriples solver) - (solverActivityCount solver) - modifyMVar_ (solverFactorizationVar solver) $ \_ -> pure (Just fact) - pure fact + Nothing -> computeAndStoreFactorization solver {- | Solve with multiple RHS vectors in one MUMPS call, using the cached factorization. Forces factorization on first call. Subsequent calls reuse the cached LU. @@ -204,6 +194,16 @@ data CrossDBSolution = CrossDBSolution , csScalings :: !(NonEmpty (Text, Database, Vector)) } +{- | Combine two cross-DB solutions: sum their inventories and concatenate +their visited-DB scalings. Associative; folding from a base preserves the +base-first, deps-in-order BFS layout 'csScalings' documents. +-} +instance Semigroup CrossDBSolution where + a <> b = + CrossDBSolution + (M.unionWith (+) (csInventory a) (csInventory b)) + (csScalings a <> csScalings b) + {- | Batch inventory with cross-DB back-substitution. Multi-RHS is preserved at every level of the dependency DAG: @@ -320,7 +320,7 @@ goWithDepsFromScalings unitConfig depLookup db dbName extraLinks scalings depth then pure (Right baseSolutions) else do let perRootDepDemands = map (accumulateDepDemandsWith db extraLinks) scalings - allDepDbs = S.toList $ S.unions $ map M.keysSet perRootDepDemands + allDepDbs = depDbsOf perRootDepDemands if null allDepDbs then pure (Right baseSolutions) else do @@ -352,20 +352,25 @@ Exported so the substitution-aware solver ('Service.goWithSubsAndDeps') reuses the same merge shape as the plain cross-DB solver. -} mergeSolutions :: CrossDBSolution -> [CrossDBSolution] -> CrossDBSolution -mergeSolutions base depSols = - CrossDBSolution - { csInventory = - foldr (M.unionWith (+)) (csInventory base) (map csInventory depSols) - , csScalings = - NE.appendList - (csScalings base) - (concatMap (NE.toList . csScalings) depSols) - } +mergeSolutions = foldl' (<>) + +-- | Every dependency DB referenced across a level's per-root demand maps. +depDbsOf :: [DepDemands] -> [Text] +depDbsOf = S.toList . S.unions . map M.keysSet + +{- | Turn a level's per-root demand maps into the dep DB's per-root demand +vectors, performing unit conversion. Picks out @depDbName@'s share of each +root's demands. Shared by every dep resolver (inventory, contributions, and +the substitution-aware path in 'Service'). +-} +prepareDepDemandVecs :: UnitConfig -> Text -> Database -> [DepDemands] -> Either Text [Vector] +prepareDepDemandVecs unitConfig depDbName depDb = + traverse (depDemandsToVector unitConfig depDbName depDb . M.findWithDefault M.empty depDbName) resolveDep :: UnitConfig -> DepSolverLookup -> - [M.Map Text (M.Map (UUID, UUID) (Double, Text))] -> + [DepDemands] -> -- | current depth (for recursion) Int -> -- | K (so absent-dep returns the right number of 'Nothing' padding) @@ -383,13 +388,11 @@ resolveDep unitConfig depLookup perRootDepDemands depth k depDbName = do -- materialising one (which the NonEmpty type forbids). pure (Right (replicate k Nothing)) Just (depDb, depSolver) -> - let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands - depVecsE = traverse (depDemandsToVector unitConfig depDbName depDb) demandsPerRoot - in case depVecsE of - Left err -> pure (Left err) - Right depDemandVecs -> do - sols <- goWithDeps unitConfig depLookup depDb depDbName depSolver depDemandVecs (depth + 1) - pure $ fmap (map Just) sols + case prepareDepDemandVecs unitConfig depDbName depDb perRootDepDemands of + Left err -> pure (Left err) + Right depDemandVecs -> do + sols <- goWithDeps unitConfig depLookup depDb depDbName depSolver depDemandVecs (depth + 1) + pure $ fmap (map Just) sols {- | Cross-DB per-activity LCIA contributions. Walks the same dep graph as 'goWithDeps' but attributes contributions per @(dbName, localPid)@ instead @@ -442,7 +445,7 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r then pure (Right localTagged) else do let perRootDepDemands = map (accumulateDepDemands db) scalings - allDepDbs = S.toList $ S.unions $ map M.keysSet perRootDepDemands + allDepDbs = depDbsOf perRootDepDemands if null allDepDbs then pure (Right localTagged) else do @@ -453,7 +456,7 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r Right $ foldr (M.unionWith (+)) localTagged depMaps resolveDepContribs :: - [M.Map Text (M.Map (UUID, UUID) (Double, Text))] -> + [DepDemands] -> Int -> Text -> IO (Either Text (M.Map (Text, ProcessId) Double)) @@ -462,8 +465,6 @@ crossDBProcessContributions unitConfig unitDB flowDB depLookup rootDb rootName r case depM of Nothing -> pure (Right M.empty) -- dep DB not loaded; root-level gate should have caught this Just (depDb, depSolver) -> - let demandsPerRoot = map (M.findWithDefault M.empty depDbName) perRootDepDemands - depVecsE = traverse (depDemandsToVector unitConfig depDbName depDb) demandsPerRoot - in case depVecsE of - Left err -> pure (Left err) - Right depDemandVecs -> go depDb depDbName depSolver depDemandVecs (depth + 1) + case prepareDepDemandVecs unitConfig depDbName depDb perRootDepDemands of + Left err -> pure (Left err) + Right depDemandVecs -> go depDb depDbName depSolver depDemandVecs (depth + 1) From b76fbacbb526a7bf2e67871b70fb4158e9ddd659 Mon Sep 17 00:00:00 2001 From: Christophe Combelles Date: Thu, 28 May 2026 21:11:36 +0200 Subject: [PATCH 43/43] refactor(ecospold): extract shared cut-off strategy into EcoSpold.Cutoff (#99) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What The cut-off post-processing — reducing a multi-output dataset to a single reference product so the engine sees a single-output process — was duplicated **verbatim** between the EcoSpold1 and EcoSpold2 parsers (~70 lines, the same eight-function chain in each). This moves that block into one shared `EcoSpold.Cutoff` module that both parsers import. This is the last remaining piece of the EcoSpold refactor series (after the Parser2 split #97 and the Parser1 fold dedup #94), which left the cut-off block still copy-pasted in both files. ## Changes - **New `src/EcoSpold/Cutoff.hs`** — holds `applyCutoffStrategy`, `hasReferenceProduct`, `removeZeroAmountCoproducts`, `assignSingleProductAsReference`, `isProductionExchange` (exported) plus the internal `updateReferenceProduct` / `markAsReference` / `unmarkAsReference`. - **`Parser1.hs` / `Parser2.hs`** — delete the duplicated block, `import EcoSpold.Cutoff (applyCutoffStrategy)`. Parser1 drops its "exported for testing" cut-off helpers. - **`EcoSpold1Spec.hs`** — imports the cut-off helpers from `EcoSpold.Cutoff` instead of the old `EcoSpold.Parser1` re-export. - **`volca.cabal`** — registers the new library module. Pure post-fold logic, runs once per activity (not in the SAX loop), so no behaviour or throughput change. Net: ~154 duplicated lines removed across the two parsers, replaced by one ~95-line module. ## Verification - `cabal build lib:volca` + `exe:volca`: clean (only pre-existing unrelated MCP warnings). - `cabal test lca-tests`: **1107 examples, 0 failures, 1 pending** — including the EcoSpold1 cut-off-helper tests now resolving against the new module. --- src/EcoSpold/Cutoff.hs | 94 +++++++++++++++++++++++++++++++++++++++++ src/EcoSpold/Parser1.hs | 77 +-------------------------------- src/EcoSpold/Parser2.hs | 79 +--------------------------------- test/EcoSpold1Spec.hs | 1 + volca.cabal | 1 + 5 files changed, 98 insertions(+), 154 deletions(-) create mode 100644 src/EcoSpold/Cutoff.hs diff --git a/src/EcoSpold/Cutoff.hs b/src/EcoSpold/Cutoff.hs new file mode 100644 index 00000000..04d204e8 --- /dev/null +++ b/src/EcoSpold/Cutoff.hs @@ -0,0 +1,94 @@ +{- | Cut-off allocation strategy shared by the EcoSpold1 and EcoSpold2 parsers. + +Post-processing applied once per parsed activity (outside the SAX fold): +reduce a multi-output dataset to a single reference product so the downstream +engine sees a single-output process. +-} +module EcoSpold.Cutoff ( + applyCutoffStrategy, + hasReferenceProduct, + removeZeroAmountCoproducts, + assignSingleProductAsReference, + isProductionExchange, +) where + +import qualified Data.Text as T +import Types + +{- | Apply cut-off strategy +1. Remove zero-amount production exchanges (co-products) +2. Assign single non-zero product as reference product +3. Ensure single-output process structure +4. VALIDATION: Fail if no reference product can be established +-} +applyCutoffStrategy :: Activity -> Either String Activity +applyCutoffStrategy activity = + let filteredExchanges = removeZeroAmountCoproducts (exchanges activity) + updatedActivity = activity{exchanges = filteredExchanges} + finalActivity = + if hasReferenceProduct updatedActivity + then updatedActivity + else assignSingleProductAsReference updatedActivity + in if hasReferenceProduct finalActivity + then Right finalActivity + else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity) + +-- | Check if activity has any reference product +hasReferenceProduct :: Activity -> Bool +hasReferenceProduct activity = any exchangeIsReference (exchanges activity) + +-- | Remove production exchanges with zero amounts +removeZeroAmountCoproducts :: [Exchange] -> [Exchange] +removeZeroAmountCoproducts = filter keepExchange + where + keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True + keepExchange TechnosphereExchange{techRole = ReferenceInput} = True + keepExchange TechnosphereExchange{techRole = Input} = True + keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0 + keepExchange BiosphereExchange{} = True + keepExchange WasteExchange{} = True + +-- | Assign single product as reference product +assignSingleProductAsReference :: Activity -> Activity +assignSingleProductAsReference activity = + let productionExchanges = [ex | ex <- exchanges activity, isProductionExchange ex] + nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0] + in case nonZeroProduction of + [singleProduct] -> + -- Update the single product to be reference product + let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges activity) + in activity{exchanges = updatedExchanges} + [] -> activity -- No production exchanges, leave as-is + _ -> activity -- Multiple production exchanges, leave as-is (shouldn't happen after cutoff) + +-- | Check if exchange is production exchange (technosphere output) +isProductionExchange :: Exchange -> Bool +isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True +isProductionExchange TechnosphereExchange{techRole = Coproduct} = True +isProductionExchange TechnosphereExchange{techRole = Input} = False +isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False +isProductionExchange BiosphereExchange{} = False +isProductionExchange WasteExchange{} = False -- waste outputs aren't "production" in the SimaPro sense + +-- | Update reference product flag for the specified exchange +updateReferenceProduct :: Exchange -> Exchange -> Exchange +updateReferenceProduct target current + | exchangeFlowId target == exchangeFlowId current = markAsReference current + | otherwise = unmarkAsReference current + +-- | Promote a production exchange to reference product +markAsReference :: Exchange -> Exchange +markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct} +markAsReference ex@BiosphereExchange{} = ex +markAsReference ex@WasteExchange{} = ex -- waste flows can't be promoted to reference product + +-- | Demote a reference role back to non-reference (preserving input/output direction) +unmarkAsReference :: Exchange -> Exchange +unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role} + where + demote ReferenceProduct = Coproduct + demote ReferenceInput = Input + demote Coproduct = Coproduct + demote Input = Input +unmarkAsReference ex@BiosphereExchange{} = ex +unmarkAsReference ex@WasteExchange{} = ex diff --git a/src/EcoSpold/Parser1.hs b/src/EcoSpold/Parser1.hs index 0c18de02..73c4f0ab 100644 --- a/src/EcoSpold/Parser1.hs +++ b/src/EcoSpold/Parser1.hs @@ -17,11 +17,6 @@ module EcoSpold.Parser1 ( -- * Pure helpers (exported for testing) generateFlowUUID, generateUnitUUID, - applyCutoffStrategy, - hasReferenceProduct, - removeZeroAmountCoproducts, - assignSingleProductAsReference, - isProductionExchange, ) where import Control.Monad (forM_) @@ -36,6 +31,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.UUID as UUID import qualified Data.UUID.V5 as UUID5 import EcoSpold.Common (bsToDouble, bsToInt, bsToText, isElement, nonEmptyText) +import EcoSpold.Cutoff (applyCutoffStrategy) import Progress (ProgressLevel (..), reportProgress) import Types import qualified Xeno.SAX as X @@ -482,77 +478,6 @@ streamParseActivityAndFlowsFromFile1 path = do !xmlContent <- BS.readFile path return (parseWithXeno xmlContent) --- | Apply cut-off strategy (same logic as EcoSpold2) -applyCutoffStrategy :: Activity -> Either String Activity -applyCutoffStrategy activity = - let filteredExchanges = removeZeroAmountCoproducts (exchanges activity) - updatedActivity = activity{exchanges = filteredExchanges} - finalActivity = - if hasReferenceProduct updatedActivity - then updatedActivity - else assignSingleProductAsReference updatedActivity - in if hasReferenceProduct finalActivity - then Right finalActivity - else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity) - --- | Check if activity has any reference product -hasReferenceProduct :: Activity -> Bool -hasReferenceProduct act = any exchangeIsReference (exchanges act) - --- | Remove production exchanges with zero amounts -removeZeroAmountCoproducts :: [Exchange] -> [Exchange] -removeZeroAmountCoproducts = filter keepExchange - where - keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True - keepExchange TechnosphereExchange{techRole = ReferenceInput} = True - keepExchange TechnosphereExchange{techRole = Input} = True - keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0 - keepExchange BiosphereExchange{} = True - keepExchange WasteExchange{} = True - --- | Assign single product as reference product -assignSingleProductAsReference :: Activity -> Activity -assignSingleProductAsReference act = - let productionExchanges = [ex | ex <- exchanges act, isProductionExchange ex] - nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0] - in case nonZeroProduction of - [singleProduct] -> - let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges act) - in act{exchanges = updatedExchanges} - _ -> act - --- | Check if exchange is production exchange (technosphere output) -isProductionExchange :: Exchange -> Bool -isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True -isProductionExchange TechnosphereExchange{techRole = Coproduct} = True -isProductionExchange TechnosphereExchange{techRole = Input} = False -isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False -isProductionExchange BiosphereExchange{} = False -isProductionExchange WasteExchange{} = False -- waste outputs aren't "production" in the SimaPro sense - --- | Update reference product flag -updateReferenceProduct :: Exchange -> Exchange -> Exchange -updateReferenceProduct target current - | exchangeFlowId target == exchangeFlowId current = markAsReference current - | otherwise = unmarkAsReference current - --- | Promote a production exchange to reference product -markAsReference :: Exchange -> Exchange -markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct} -markAsReference ex@BiosphereExchange{} = ex -markAsReference ex@WasteExchange{} = ex -- waste flows can't be promoted to reference product - --- | Demote a reference role back to non-reference (preserving input/output direction) -unmarkAsReference :: Exchange -> Exchange -unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role} - where - demote ReferenceProduct = Coproduct - demote ReferenceInput = Input - demote Coproduct = Coproduct - demote Input = Input -unmarkAsReference ex@BiosphereExchange{} = ex -unmarkAsReference ex@WasteExchange{} = ex - -- ============================================================================ -- Multi-dataset file support -- ============================================================================ diff --git a/src/EcoSpold/Parser2.hs b/src/EcoSpold/Parser2.hs index d82fe7d3..0cd910f2 100644 --- a/src/EcoSpold/Parser2.hs +++ b/src/EcoSpold/Parser2.hs @@ -15,6 +15,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.UUID as UUID import qualified Data.UUID.V5 as UUID5 import EcoSpold.Common (bsToDouble, bsToInt, bsToIntMaybe, bsToText, isElement) +import EcoSpold.Cutoff (applyCutoffStrategy) import Progress (ProgressLevel (..), reportProgress) import System.FilePath (takeBaseName) import Types @@ -675,81 +676,3 @@ streamParseActivityAndFlowsFromFile path = do Right (result, warnings) -> do mapM_ (reportProgress Warning) warnings return $ Right result - -{- | Apply cut-off strategy -1. Remove zero-amount production exchanges (co-products) -2. Assign single non-zero product as reference product -3. Ensure single-output process structure -4. VALIDATION: Fail if no reference product can be established --} -applyCutoffStrategy :: Activity -> Either String Activity -applyCutoffStrategy activity = - let filteredExchanges = removeZeroAmountCoproducts (exchanges activity) - updatedActivity = activity{exchanges = filteredExchanges} - finalActivity = - if hasReferenceProduct updatedActivity - then updatedActivity - else assignSingleProductAsReference updatedActivity - in if hasReferenceProduct finalActivity - then Right finalActivity - else Left $ "Activity has no reference product: " ++ T.unpack (activityName activity) - --- | Check if activity has any reference product -hasReferenceProduct :: Activity -> Bool -hasReferenceProduct activity = any exchangeIsReference (exchanges activity) - --- | Remove production exchanges with zero amounts -removeZeroAmountCoproducts :: [Exchange] -> [Exchange] -removeZeroAmountCoproducts = filter keepExchange - where - keepExchange TechnosphereExchange{techRole = ReferenceProduct} = True - keepExchange TechnosphereExchange{techRole = ReferenceInput} = True - keepExchange TechnosphereExchange{techRole = Input} = True - keepExchange TechnosphereExchange{techRole = Coproduct, techAmount = amount} = amount /= 0.0 - keepExchange BiosphereExchange{} = True - keepExchange WasteExchange{} = True - --- | Assign single product as reference product -assignSingleProductAsReference :: Activity -> Activity -assignSingleProductAsReference activity = - let productionExchanges = [ex | ex <- exchanges activity, isProductionExchange ex] - nonZeroProduction = [ex | ex <- productionExchanges, exchangeAmount ex /= 0.0] - in case nonZeroProduction of - [singleProduct] -> - -- Update the single product to be reference product - let updatedExchanges = map (updateReferenceProduct singleProduct) (exchanges activity) - in activity{exchanges = updatedExchanges} - [] -> activity -- No production exchanges, leave as-is - _ -> activity -- Multiple production exchanges, leave as-is (shouldn't happen after cutoff) - --- | Check if exchange is production exchange (technosphere output) -isProductionExchange :: Exchange -> Bool -isProductionExchange TechnosphereExchange{techRole = ReferenceProduct} = True -isProductionExchange TechnosphereExchange{techRole = Coproduct} = True -isProductionExchange TechnosphereExchange{techRole = Input} = False -isProductionExchange TechnosphereExchange{techRole = ReferenceInput} = False -isProductionExchange BiosphereExchange{} = False -isProductionExchange WasteExchange{} = False - --- | Update reference product flag for the specified exchange -updateReferenceProduct :: Exchange -> Exchange -> Exchange -updateReferenceProduct target current - | exchangeFlowId target == exchangeFlowId current = markAsReference current - | otherwise = unmarkAsReference current - --- | Promote a production exchange to reference product -markAsReference :: Exchange -> Exchange -markAsReference ex@TechnosphereExchange{} = ex{techRole = ReferenceProduct} -markAsReference ex@BiosphereExchange{} = ex -markAsReference ex@WasteExchange{} = ex - --- | Demote a reference role back to non-reference (preserving input/output direction) -unmarkAsReference :: Exchange -> Exchange -unmarkAsReference ex@TechnosphereExchange{techRole = role} = ex{techRole = demote role} - where - demote ReferenceProduct = Coproduct - demote ReferenceInput = Input - demote Coproduct = Coproduct - demote Input = Input -unmarkAsReference ex@BiosphereExchange{} = ex -unmarkAsReference ex@WasteExchange{} = ex diff --git a/test/EcoSpold1Spec.hs b/test/EcoSpold1Spec.hs index a1e9c46e..db693f2e 100644 --- a/test/EcoSpold1Spec.hs +++ b/test/EcoSpold1Spec.hs @@ -7,6 +7,7 @@ import qualified Data.Map.Strict as M import Data.UUID (nil) import Test.Hspec +import EcoSpold.Cutoff import EcoSpold.Parser1 import Types diff --git a/volca.cabal b/volca.cabal index cf215f66..509127ba 100644 --- a/volca.cabal +++ b/volca.cabal @@ -73,6 +73,7 @@ library , CLI.Client , CLI.Repl , EcoSpold.Common + , EcoSpold.Cutoff , EcoSpold.Parser2 , EcoSpold.Parser1 , SimaPro.Parser