1212import numpy as np
1313
1414from message_ix_models .tools .exo_data import BaseOptions , ExoDataSource , register_source
15- from message_ix_models .util import cached , path_fallback , silence_log
15+ from message_ix_models .util import MappingAdapter , cached , path_fallback , silence_log
1616from message_ix_models .util .pooch import SOURCE , fetch
1717
1818if TYPE_CHECKING :
1919 from pathlib import Path
2020 from re import Pattern
2121
22- from genno .types import AnyQuantity
22+ from genno .types import AnyQuantity , Key
2323 from pandas import DataFrame
2424
2525log = logging .getLogger (__name__ )
2626
27+ #: Labels appearing in the :math:`(i, j)` dimensions of the :class:`BACI` data that are
28+ #: not current ISO 3166-1 numeric codes. These are generally of 3 kinds:
29+ #:
30+ #: - Numeric codes that are in ISO 3166-3 (“Code for formerly used names of countries”),
31+ #: not ISO 3166-1.
32+ #: - Numeric codes for countries that exist in ISO 3166-1, but simply differ. For
33+ #: example, ISO has 250 for “France”, but BACI uses 251.
34+ #: - Numeric codes for countries or country groups that do not appear in ISO 3166.
35+ #:
36+ #: This is a subset of the labels appearing in the ``country_code`` column of the file
37+ #: :file:`country_codes_V202501.csv` in the archive :file:`BACI_HS92_V202501.zip`. Only
38+ #: the labels appearing in the data files are included.
39+ COUNTRY_CODES = [
40+ (58 , "BEL" ), # "Belgium-Luxembourg (...1998)"; 56 in ISO 3166-1
41+ (251 , "FRA" ), # 250
42+ (490 , "S19" ), # "Other Asia, nes", not in ISO 3166-1
43+ (530 , "ANT" ), # Part of ISO 3166-3, not -1
44+ (579 , "NOR" ), # 578
45+ (699 , "IND" ), # 356
46+ (711 , "ZA1" ), # "Southern African Customs Union (...1999)"; not in ISO 3166-1
47+ (736 , "SDN" ), # "Sudan (...2011)"; 729
48+ (757 , "CHE" ), # 756
49+ (842 , "USA" ), # 840
50+ (849 , "PUS" ), # "US Misc. Pacific Isds", not in ISO 3166-1
51+ (891 , "SCG" ), # Part of ISO 3166-3, not -1
52+ ]
53+
2754#: Dimensions and data types for input data. In order to reduce memory and disk usage:
2855#:
2956#: - :py:`np.uint16` (0 to 65_535) is used for t (year), i (exporter), and j (importer)
@@ -47,10 +74,7 @@ class BACI(ExoDataSource):
4774 - The 202501 release only.
4875 - The 1992 Harmonized System (HS92) only.
4976
50- .. todo::
51- - Transform ISO 3166-1 numeric codes for the :math:`i, j` dimensions to
52- alpha-3 codes.
53- - Aggregate to MESSAGE regions.
77+ .. todo:: Aggregate to MESSAGE regions.
5478 """
5579
5680 @dataclass
@@ -136,6 +160,15 @@ def get(self) -> "AnyQuantity":
136160 .iloc [:, 0 ]
137161 )
138162
163+ def transform (self , c : "genno.Computer" , base_key : "Key" ) -> "Key" :
164+ """Prepare `c` to transform raw data from `base_key`.
165+
166+ 1. Map BACI codes for the :math:`(i, j)` dimensions from numeric (mainly ISO
167+ 3166-1 numeric) to ISO 3166-1 alpha_3. See :func:`get_mapping`.
168+ """
169+ c .add (base_key [0 ], get_mapping (), base_key )
170+ return base_key [0 ]
171+
139172
140173@cached
141174def baci_data_from_files (
@@ -168,6 +201,27 @@ def baci_data_from_files(
168201 return result
169202
170203
204+ def get_mapping () -> MappingAdapter :
205+ """Return an adapter from codes appearing in BACI data.
206+
207+ The BACI data for dimensions :math:`i` (exporter) and :math:`j` (importer) contain
208+ ISO 3166-1 numeric codes, plus some other idiosyncratic codes from
209+ :data:`COUNTRY_CODES`. The returned adapter maps these to the corresponding alpha-3
210+ code.
211+
212+ Using the adapter makes data suitable for aggregation using the
213+ :mod:`message_ix_models` ``node`` code lists, which include those alpha-3 codes as
214+ children of each region code.
215+ """
216+ from pycountry import countries
217+
218+ # All values from ISO 3166-1, plus some idiosyncratic values from COUNTRY_CODES
219+ num_to_a3 = COUNTRY_CODES + [(int (c .numeric ), c .alpha_3 ) for c in countries ]
220+
221+ # Use the same mapping for both i and j dimensions
222+ return MappingAdapter ({"i" : num_to_a3 , "j" : num_to_a3 }, on_missing = "raise" )
223+
224+
171225if __name__ == "__main__" : # pragma: no cover
172226 from tqdm import tqdm
173227
0 commit comments