Skip to content

Commit a795a40

Browse files
committed
Transform BACI numeric to ISO 3166-1 alpha-3 codes
1 parent 07b23fe commit a795a40

File tree

1 file changed

+60
-6
lines changed

1 file changed

+60
-6
lines changed

message_ix_models/tools/cepii.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,45 @@
1212
import numpy as np
1313

1414
from message_ix_models.tools.exo_data import BaseOptions, ExoDataSource, register_source
15-
from message_ix_models.util import cached, path_fallback, silence_log
15+
from message_ix_models.util import MappingAdapter, cached, path_fallback, silence_log
1616
from message_ix_models.util.pooch import SOURCE, fetch
1717

1818
if TYPE_CHECKING:
1919
from pathlib import Path
2020
from re import Pattern
2121

22-
from genno.types import AnyQuantity
22+
from genno.types import AnyQuantity, Key
2323
from pandas import DataFrame
2424

2525
log = logging.getLogger(__name__)
2626

27+
#: Labels appearing in the :math:`(i, j)` dimensions of the :class:`BACI` data that are
28+
#: not current ISO 3166-1 numeric codes. These are generally of 3 kinds:
29+
#:
30+
#: - Numeric codes that are in ISO 3166-3 (“Code for formerly used names of countries”),
31+
#: not ISO 3166-1.
32+
#: - Numeric codes for countries that exist in ISO 3166-1, but simply differ. For
33+
#: example, ISO has 250 for “France”, but BACI uses 251.
34+
#: - Numeric codes for countries or country groups that do not appear in ISO 3166.
35+
#:
36+
#: This is a subset of the labels appearing in the ``country_code`` column of the file
37+
#: :file:`country_codes_V202501.csv` in the archive :file:`BACI_HS92_V202501.zip`. Only
38+
#: the labels appearing in the data files are included.
39+
COUNTRY_CODES = [
40+
(58, "BEL"), # "Belgium-Luxembourg (...1998)"; 56 in ISO 3166-1
41+
(251, "FRA"), # 250
42+
(490, "S19"), # "Other Asia, nes", not in ISO 3166-1
43+
(530, "ANT"), # Part of ISO 3166-3, not -1
44+
(579, "NOR"), # 578
45+
(699, "IND"), # 356
46+
(711, "ZA1"), # "Southern African Customs Union (...1999)"; not in ISO 3166-1
47+
(736, "SDN"), # "Sudan (...2011)"; 729
48+
(757, "CHE"), # 756
49+
(842, "USA"), # 840
50+
(849, "PUS"), # "US Misc. Pacific Isds", not in ISO 3166-1
51+
(891, "SCG"), # Part of ISO 3166-3, not -1
52+
]
53+
2754
#: Dimensions and data types for input data. In order to reduce memory and disk usage:
2855
#:
2956
#: - :py:`np.uint16` (0 to 65_535) is used for t (year), i (exporter), and j (importer)
@@ -47,10 +74,7 @@ class BACI(ExoDataSource):
4774
- The 202501 release only.
4875
- The 1992 Harmonized System (HS92) only.
4976
50-
.. todo::
51-
- Transform ISO 3166-1 numeric codes for the :math:`i, j` dimensions to
52-
alpha-3 codes.
53-
- Aggregate to MESSAGE regions.
77+
.. todo:: Aggregate to MESSAGE regions.
5478
"""
5579

5680
@dataclass
@@ -136,6 +160,15 @@ def get(self) -> "AnyQuantity":
136160
.iloc[:, 0]
137161
)
138162

163+
def transform(self, c: "genno.Computer", base_key: "Key") -> "Key":
164+
"""Prepare `c` to transform raw data from `base_key`.
165+
166+
1. Map BACI codes for the :math:`(i, j)` dimensions from numeric (mainly ISO
167+
3166-1 numeric) to ISO 3166-1 alpha_3. See :func:`get_mapping`.
168+
"""
169+
c.add(base_key[0], get_mapping(), base_key)
170+
return base_key[0]
171+
139172

140173
@cached
141174
def baci_data_from_files(
@@ -168,6 +201,27 @@ def baci_data_from_files(
168201
return result
169202

170203

204+
def get_mapping() -> MappingAdapter:
205+
"""Return an adapter from codes appearing in BACI data.
206+
207+
The BACI data for dimensions :math:`i` (exporter) and :math:`j` (importer) contain
208+
ISO 3166-1 numeric codes, plus some other idiosyncratic codes from
209+
:data:`COUNTRY_CODES`. The returned adapter maps these to the corresponding alpha-3
210+
code.
211+
212+
Using the adapter makes data suitable for aggregation using the
213+
:mod:`message_ix_models` ``node`` code lists, which include those alpha-3 codes as
214+
children of each region code.
215+
"""
216+
from pycountry import countries
217+
218+
# All values from ISO 3166-1, plus some idiosyncratic values from COUNTRY_CODES
219+
num_to_a3 = COUNTRY_CODES + [(int(c.numeric), c.alpha_3) for c in countries]
220+
221+
# Use the same mapping for both i and j dimensions
222+
return MappingAdapter({"i": num_to_a3, "j": num_to_a3}, on_missing="raise")
223+
224+
171225
if __name__ == "__main__": # pragma: no cover
172226
from tqdm import tqdm
173227

0 commit comments

Comments
 (0)