Skip to content

Commit b6e43fc

Browse files
authored
add support for CWL import (#281)
* add support for CWL reading
1 parent 837fd2e commit b6e43fc

File tree

3 files changed

+217
-5
lines changed

3 files changed

+217
-5
lines changed

pygeometa/schemas/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# those files. Users are asked to read the 3rd Party Licenses
1919
# referenced with those assets.
2020
#
21-
# Copyright (c) 2022 Tom Kralidis
21+
# Copyright (c) 2025 Tom Kralidis
2222
#
2323
# Permission is hereby granted, free of charge, to any person
2424
# obtaining a copy of this software and associated documentation
@@ -61,7 +61,8 @@
6161
'dcat': 'pygeometa.schemas.dcat.DCATOutputSchema',
6262
'wmo-cmp': 'pygeometa.schemas.wmo_cmp.WMOCMPOutputSchema',
6363
'wmo-wcmp2': 'pygeometa.schemas.wmo_wcmp2.WMOWCMP2OutputSchema',
64-
'wmo-wigos': 'pygeometa.schemas.wmo_wigos.WMOWIGOSOutputSchema'
64+
'wmo-wigos': 'pygeometa.schemas.wmo_wigos.WMOWIGOSOutputSchema',
65+
'cwl': 'pygeometa.schemas.cwl.CWLOutputSchema'
6566
}
6667

6768

pygeometa/schemas/cwl/__init__.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
# =================================================================
2+
#
3+
# Terms and Conditions of Use
4+
#
5+
# Unless otherwise noted, computer program source code of this
6+
# distribution # is covered under Crown Copyright, Government of
7+
# Canada, and is distributed under the MIT License.
8+
#
9+
# The Canada wordmark and related graphics associated with this
10+
# distribution are protected under trademark law and copyright law.
11+
# No permission is granted to use them outside the parameters of
12+
# the Government of Canada's corporate identity program. For
13+
# more information, see
14+
# http://www.tbs-sct.gc.ca/fip-pcim/index-eng.asp
15+
#
16+
# Copyright title to all 3rd party software distributed with this
17+
# software is held by the respective copyright holders as noted in
18+
# those files. Users are asked to read the 3rd Party Licenses
19+
# referenced with those assets.
20+
#
21+
# Copyright (c) 2025 Tom Kralidis
22+
#
23+
# Permission is hereby granted, free of charge, to any person
24+
# obtaining a copy of this software and associated documentation
25+
# files (the "Software"), to deal in the Software without
26+
# restriction, including without limitation the rights to use,
27+
# copy, modify, merge, publish, distribute, sublicense, and/or sell
28+
# copies of the Software, and to permit persons to whom the
29+
# Software is furnished to do so, subject to the following
30+
# conditions:
31+
#
32+
# The above copyright notice and this permission notice shall be
33+
# included in all copies or substantial portions of the Software.
34+
#
35+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
37+
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
39+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
40+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
41+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
42+
# OTHER DEALINGS IN THE SOFTWARE.
43+
#
44+
# =================================================================
45+
46+
import datetime
47+
import logging
48+
import os
49+
from typing import Union
50+
51+
import yaml
52+
53+
from pygeometa.schemas.base import BaseOutputSchema
54+
55+
THISDIR = os.path.dirname(os.path.realpath(__file__))
56+
57+
LOGGER = logging.getLogger(__name__)
58+
59+
60+
class CWLOutputSchema(BaseOutputSchema):
61+
"""Common Workflow Language v1.2 schema"""
62+
63+
def __init__(self):
64+
"""
65+
Initialize object
66+
67+
:returns: pygeometa.schemas.cwl.CWLOutputSchema
68+
"""
69+
70+
description = 'Common Workflow Language v1.2'
71+
72+
super().__init__('cwl', description, 'yaml', THISDIR)
73+
74+
def import_(self, metadata: str) -> dict:
75+
76+
metadata = yaml.safe_load(metadata)
77+
78+
mcf = {
79+
'mcf': {
80+
'version': '1.0'
81+
},
82+
'metadata': {
83+
'language': 'eng',
84+
'charset': 'utf8'
85+
},
86+
'spatial': {
87+
'datatype': 'grid',
88+
'geomtype': 'solid'
89+
},
90+
'identification': {
91+
'charset': 'utf8',
92+
'language': 'missing',
93+
'keywords': {},
94+
'dates': {},
95+
'status': 'onGoing',
96+
'maintenancefrequency': 'continual'
97+
},
98+
'contact': {
99+
'pointOfContact': {},
100+
'distributor': {},
101+
'author': {}
102+
},
103+
'distribution': {},
104+
'dataquality': {
105+
'lineage': {}
106+
}
107+
}
108+
109+
now = datetime.datetime.now(datetime.UTC)
110+
111+
wf = list(filter(lambda x: x['class'] == 'Workflow', metadata['$graph']))[0] # noqa
112+
113+
mcf['metadata']['identifier'] = wf['id']
114+
mcf['metadata']['hierarchylevel'] = 'application'
115+
mcf['metadata']['datestamp'] = now
116+
mcf['identification']['title'] = wf['label']
117+
mcf['identification']['abstract'] = wf['doc']
118+
119+
mcf['identification']['keywords']['default'] = {
120+
'keywords': [f'softwareVersion:{metadata["s:softwareVersion"]}', 'application', 'CWL'], # noqa
121+
'keywords_type': 'theme'
122+
}
123+
124+
if 's:keywords' in metadata:
125+
mcf['identification']['keywords']['default']['keywords'].extend(
126+
metadata['s:keywords'].split(',')
127+
)
128+
129+
mcf['dataquality']['scope'] = {'level': 'application'}
130+
131+
if 's:releaseNotes' in metadata:
132+
mcf['dataquality']['lineage']['statement'] = metadata['s:releaseNotes'] # noqa
133+
mcf['distribution']['releaseNotes'] = {
134+
'rel': 'related',
135+
'url': metadata['s:releaseNotes'],
136+
'type': 'text/html',
137+
'name': 'releaseNotes',
138+
'description': 'release notes'
139+
}
140+
141+
if 's:version' in metadata:
142+
mcf['identification']['edition'] = metadata['s:version']
143+
144+
if 's:author' in metadata:
145+
mcf['contact']['author'] = {
146+
'individualname': metadata['s:author'][0]['s:name'],
147+
'organization': metadata['s:author'][0]['s:affiliation'],
148+
'email': metadata['s:author'][0]['s:email'],
149+
}
150+
151+
if 's:contributor' in metadata:
152+
mcf['contact']['pointOfContact'] = {
153+
'individualname': metadata['s:contributor'][0]['s:name'],
154+
'organization': metadata['s:contributor'][0]['s:affiliation'],
155+
'email': metadata['s:contributor'][0]['s:email'],
156+
}
157+
158+
if 's:dateCreated' in metadata:
159+
mcf['identification']['dates'] = {
160+
'creation': metadata['s:dateCreated']
161+
}
162+
163+
if 's:citation' in metadata:
164+
mcf['distribution']['citation'] = {
165+
'rel': 'cite-as',
166+
'url': metadata['s:citation'],
167+
'type': 'text/html',
168+
'name': 'citation',
169+
'description': 'citation'
170+
}
171+
172+
if 's:codeRepository' in metadata:
173+
mcf['distribution']['codeRepository'] = {
174+
'rel': 'working-copy-of',
175+
'url': metadata['s:codeRepository'],
176+
'type': 'text/html',
177+
'name': 'codeRepository',
178+
'description': 'code repository'
179+
}
180+
181+
if 's:license' in metadata:
182+
mcf['distribution']['license'] = {
183+
'rel': 'license',
184+
'url': metadata['s:license'],
185+
'type': 'text/html',
186+
'name': 'license',
187+
'description': 'license'
188+
}
189+
190+
if 's:logo' in metadata:
191+
mcf['distribution']['logo'] = {
192+
'rel': 'icon',
193+
'url': metadata['s:logo'],
194+
'type': 'text/html',
195+
'name': 'logo',
196+
'description': 'logo'
197+
}
198+
199+
mcf['identification']['extents'] = {
200+
'spatial': [{
201+
'bbox': [-180, -90, 180, 90],
202+
'crs': 4326
203+
}]
204+
}
205+
206+
LOGGER.info(f'MCF: {mcf}')
207+
208+
return mcf
209+
210+
def write(self, mcf: dict, stringify: str = True) -> Union[dict, str]:
211+
raise NotImplementedError()

tests/run_tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,17 +226,17 @@ def test_get_supported_schemas(self):
226226

227227
schemas = sorted(get_supported_schemas())
228228
self.assertIsInstance(schemas, list, 'Expected list')
229-
self.assertEqual(len(schemas), 9,
229+
self.assertEqual(len(schemas), 10,
230230
'Expected specific number of supported schemas')
231231
self.assertEqual(sorted(schemas),
232-
sorted(['dcat', 'iso19139', 'iso19139-2',
232+
sorted(['cwl', 'dcat', 'iso19139', 'iso19139-2',
233233
'iso19139-hnap', 'oarec-record',
234234
'stac-item', 'wmo-cmp', 'wmo-wcmp2',
235235
'wmo-wigos']),
236236
'Expected exact list of supported schemas')
237237

238238
schemas = get_supported_schemas(include_autodetect=True)
239-
self.assertEqual(len(schemas), 10,
239+
self.assertEqual(len(schemas), 11,
240240
'Expected specific number of supported schemas')
241241
self.assertIn('autodetect', schemas, 'Expected autodetect in list')
242242

0 commit comments

Comments
 (0)