Skip to content

Commit 9f0ad3e

Browse files
authored
Merge pull request #27 from emiliom/ndbc
Upgrades to address failures with NDBC SOS handling
2 parents 3b91854 + 4dc15be commit 9f0ad3e

File tree

1 file changed

+73
-49
lines changed

1 file changed

+73
-49
lines changed

sensorml2iso/sensorml2iso.py

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
# import numpy as np
2020
import pandas as pd
21-
# from shapely.geometry import Point
22-
# import geopandas as gpd
2321

2422
from owslib.sos import SensorObservationService
2523
from owslib.swe.sensor.sml import SensorML, Contact, Documentation
@@ -77,7 +75,8 @@ class Sensorml2Iso:
7775
'application/ioos+xml;version=0.6.1': 'XML (IOOS DIF SOS v0.6.1)'
7876
}
7977

80-
def __init__(self, service=None, active_station_days=None, stations=None, getobs_req_hours=None, response_formats=None, sos_type=None, output_dir=None, verbose=False):
78+
def __init__(self, service=None, active_station_days=None, stations=None, getobs_req_hours=None,
79+
response_formats=None, sos_type=None, output_dir=None, verbose=False):
8180
"""
8281
"""
8382

@@ -134,19 +133,6 @@ def run(self):
134133
self.log.write(u"\nNo valid SensorML documents obtained from SOS serivce. Verify service is compliant with the SOS profile [URL: {url}]".format(url=self.service))
135134
sys.exit("No valed SensorML documents obtained from SOS serivce. Verify service is compliant with the SOS profile [URL: {url}]".format(url=self.service))
136135

137-
# Assign EPSG:4326 CRS, retrieved from epsg.io
138-
# The OGC WKT crs string is available directly at http://epsg.io/4326.wkt
139-
# or http://spatialreference.org/ref/epsg/4326/ogcwkt/
140-
# crs = '''GEOGCS["WGS 84",
141-
# DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],
142-
# AUTHORITY["EPSG","6326"]],
143-
# PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],
144-
# UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],
145-
# AUTHORITY["EPSG","4326"]]'
146-
# '''
147-
# geometry = [Point(xy) for xy in zip(stations_df.lon, stations_df.lat)]
148-
# self.stations_gdf = gpd.GeoDataFrame(stations_df, geometry=geometry, crs=crs)
149-
150136
# determine active/inactive stations (--active_station_days parameter if provided) and filter stations_df accordingly:
151137
if self.active_station_days is not None:
152138
station_active_date = datetime.now() - timedelta(days=self.active_station_days)
@@ -188,7 +174,8 @@ def nsp(self, path):
188174
def get_stations_df(self, sos_url, station_urns_sel=None):
189175
""" Returns a Pandas Dataframe
190176
"""
191-
# oFrmts: IOOS SOS OutputFormat strings (first is compliant to the IOOS SOS spec, second is to accommodate NDBC). More info here:
177+
# oFrmts: IOOS SOS OutputFormat strings (first is compliant to the IOOS SOS spec,
178+
# second is to accommodate NDBC). More info here:
192179
# http://ioos.github.io/sos-guidelines/doc/wsdd/sos_wsdd_github_notoc/#describesensor-request:638e0b263020c13a76a55332bd966dbe
193180
oFrmts = ['text/xml; subtype="sensorML/1.0.1/profiles/ioos_sos/1.0"', 'text/xml;subtype="sensorML/1.0.1"']
194181
params = {'service': 'SOS', 'request': 'GetCapabilities', 'acceptVersions': '1.0.0'}
@@ -208,7 +195,8 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
208195
sml_errors = {}
209196
describe_sensor_url = {}
210197

211-
# leverage Pyoos Collector to query for all available stations and obtain SensorML (if station subset not passed in --stations param)
198+
# leverage Pyoos Collector to query for all available stations and obtain SensorML
199+
# (if station subset not passed in --stations param)
212200
if station_urns_sel is not None:
213201
station_urns = station_urns_sel
214202
else:
@@ -279,6 +267,12 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
279267
failures.append(station_urn)
280268
continue
281269

270+
if self.sos_type.lower() == 'ndbc':
271+
# later: add an error check
272+
sosgc_station_offering = sosgc.contents['station-' + station_urn.split(':')[-1]]
273+
else:
274+
sosgc_station_offering = None
275+
282276
try:
283277
ds = IoosDescribeSensor(sml._root)
284278
except AttributeError:
@@ -294,7 +288,9 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
294288

295289
# assign 'pos' to GML point location (accommodate 'gml:coordinates' as used by NDBC if gml:Point not found):
296290
try:
297-
pos = testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:pos'))) if testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:pos'))) is not None else testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:coordinates')))
291+
pos = testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:pos'))) \
292+
if testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:pos'))) is not None \
293+
else testXMLValue(ds.system.location.find(self.nsp('gml:Point/gml:coordinates')))
298294
station['lon'] = float(pos.split()[1])
299295
station['lat'] = float(pos.split()[0])
300296
except AttributeError as e:
@@ -315,7 +311,9 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
315311
documents_dct[name] = document
316312

317313
# obtain list of contacts (accommodate 'sml:contact' element repetition used by NDBC insead of ContactList):
318-
contacts = system_el.findall(self.nsp('sml:contact/sml:ContactList/sml:member')) if system_el.findall(self.nsp('sml:contact/sml:ContactList/sml:member')) else system_el.findall(self.nsp('sml:contact'))
314+
contacts = system_el.findall(self.nsp('sml:contact/sml:ContactList/sml:member')) \
315+
if system_el.findall(self.nsp('sml:contact/sml:ContactList/sml:member')) \
316+
else system_el.findall(self.nsp('sml:contact'))
319317
contacts_dct = {}
320318
for c in contacts:
321319
contact = Contact(c)
@@ -333,8 +331,8 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
333331
quant_lst = [sweQuant.attrib['definition'] for sweQuant in sweQuants]
334332
parameter_lst = [sweQuant.split('/')[-1] for sweQuant in quant_lst]
335333

336-
# attempt to read beginPosition, if available, otherwise use current date bc ISO requires date value in output location
337-
# in template:
334+
# attempt to read beginPosition, if available, otherwise use current date
335+
# bc ISO requires date value in output location in template:
338336
beginPosition = testXMLValue(system_el.find(self.nsp('sml:validTime/gml:TimePeriod/gml:beginPosition')))
339337
try:
340338
begin_service_date = parser.parse(beginPosition)
@@ -347,7 +345,10 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
347345

348346
station['shortName'] = ds.shortName
349347
station['longName'] = ds.longName
350-
station['wmoID'] = ds.get_ioos_def('wmoID', 'identifier', ont)
348+
if self.sos_type.lower() == 'ndbc':
349+
station['wmoID'] = station_urn.split(':')[-1]
350+
else:
351+
station['wmoID'] = ds.get_ioos_def('wmoID', 'identifier', ont)
351352
station['serverName'] = self.server_name
352353

353354
# Some capabilities-level metadata:
@@ -362,21 +363,30 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
362363
station['parentNetwork'] = ds.get_ioos_def('parentNetwork', 'classifier', ont)
363364
station['sponsor'] = ds.get_ioos_def('sponsor', 'classifier', ont)
364365

365-
# store some nested dictionaries in 'station' for appopriate SensorML sources:
366+
# store some nested dictionaries in 'station' for appropriate SensorML sources:
366367
station['contacts_dct'] = contacts_dct
367368
station['documents_dct'] = documents_dct
368369

369-
station['starting'] = ds.starting
370-
station['ending'] = ds.ending
371-
# station['starting_isostr'] = datetime.isoformat(ds.starting)
372-
# station['ending_isostr'] = datetime.isoformat(ds.ending)
373-
374-
station['parameter_uris'] = ','.join(quant_lst)
375-
station['parameters'] = ','.join(parameter_lst)
376-
station['variables'] = [var.split('/')[-1] for var in ds.variables]
370+
if self.sos_type.lower() == 'ndbc' and sosgc_station_offering is not None:
371+
station['starting'] = sosgc_station_offering.begin_position
372+
station['ending'] = sosgc_station_offering.end_position
373+
else:
374+
station['starting'] = ds.starting
375+
station['ending'] = ds.ending
376+
377+
if self.sos_type.lower() == 'ndbc' and sosgc_station_offering is not None:
378+
station['variable_uris'] = sosgc_station_offering.observed_properties
379+
station['variables'] = [var.split('/')[-1] for var in sosgc_station_offering.observed_properties]
380+
station['parameter_uris'] = ','.join(station['variable_uris'])
381+
station['parameters'] = ','.join(station['variables'])
382+
else:
383+
station['variable_uris'] = ds.variables
384+
station['variables'] = [var.split('/')[-1] for var in ds.variables]
385+
station['parameter_uris'] = ','.join(quant_lst)
386+
station['parameters'] = ','.join(parameter_lst)
377387

378388
if self.verbose:
379-
for var in ds.variables:
389+
for var in station['variable_uris']:
380390
self.log.write(u"\nvariable: {var}".format(var=var))
381391
print("variable: {var}".format(var=var))
382392

@@ -390,12 +400,14 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
390400
for id, sosgc.content in sosgc.contents.items():
391401
if sosgc.content.name == station_urn:
392402
response_formats = sosgc.content.response_formats
393-
# response_formats = [ sosgc.content.response_formats for id, sosgc.content in sosgc.contents.items() if sosgc.content.name == station_urn ]
403+
# response_formats = [ sosgc.content.response_formats for id, sosgc.content in sosgc.contents.items()
404+
# if sosgc.content.name == station_urn ]
394405

395406
# match responseFormats from SensorML (response_formats) against those passed in --response_formats parameter to
396407
# populate 'download_formats' list, that is then used to generate GetObservation requests for the template:
397408
# (default --response_formats values are: 'application/json,application/zip; subtype=x-netcdf' )
398-
download_formats = [response_format for response_format in response_formats if response_format in self.response_formats]
409+
download_formats = [response_format for response_format in response_formats
410+
if response_format in self.response_formats]
399411
station['response_formats'] = response_formats
400412
station['download_formats'] = download_formats
401413

@@ -408,18 +420,23 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
408420
print("downloadFormats: {format}".format(format=format))
409421

410422
# calculate event_time using self.getobs_req_hours:
411-
if ds.starting is not None and ds.ending is not None:
412-
event_time = "{begin:%Y-%m-%dT%H:%M:%S}/{end:%Y-%m-%dT%H:%M:%S}".format(begin=ds.ending - timedelta(hours=self.getobs_req_hours), end=ds.ending)
423+
event_time_formatstr = "{begin:%Y-%m-%dT%H:%M:%S}{utc_code}/{end:%Y-%m-%dT%H:%M:%S}{utc_code}"
424+
utc_code = 'Z' if self.sos_type.lower() == 'ndbc' else None
425+
if station['starting'] is not None and station['ending'] is not None:
426+
event_time = event_time_formatstr.format(
427+
begin=station['ending'] - timedelta(hours=self.getobs_req_hours), end=station['ending'],
428+
utc_code=utc_code)
413429
if self.verbose:
414430
self.log.write(u"\nUsing starting/ending times from SensorML for eventTime")
415431
print("Using starting/ending times from SensorML for eventTime")
416-
self.log.write(u"\nobservationTimeRange: starting: {start}, ending: {end}".format(start=ds.starting, end=ds.ending))
417-
print("observationTimeRange: starting: {start}, ending: {end}".format(start=ds.starting, end=ds.ending))
418-
432+
self.log.write(u"\nobservationTimeRange: starting: {start}, ending: {end}".format(
433+
start=station['starting'], end=station['ending']))
434+
print("observationTimeRange: starting: {start}, ending: {end}".format(
435+
start=station['starting'], end=station['ending']))
419436
else:
420437
now = datetime.now(pytz.utc)
421438
then = now - timedelta(hours=self.getobs_req_hours)
422-
event_time = "{begin:%Y-%m-%dT%H:%M:%S}/{end:%Y-%m-%dT%H:%M:%S}".format(begin=then, end=now)
439+
event_time = event_time_formatstr.format(begin=then, end=now, utc_code=utc_code)
423440
if self.verbose:
424441
self.log.write(u"\nNo 'observationTimeRange' present in SensorML. Using present time for eventTime: then: {then:%Y-%m-%dT%H:%M:%S%z}, now: {now:%Y-%m-%dT%H:%M:%S%z}".format(then=then, now=now))
425442
print("No 'observationTimeRange' present in SensorML. Using present time for eventTime: then: {then:%Y-%m-%dT%H:%M:%S%z}, now: {now:%Y-%m-%dT%H:%M:%S%z}".format(then=then, now=now))
@@ -430,9 +447,11 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
430447

431448
# create a dict to store parameters for valid example GetObservation requests for station:
432449
getobs_req_dct = {}
433-
# populate a parameters dictionary for download links for each 'observedProperty' type and secondly for each 'responseFormat' per observedProperty:
434-
getobs_params_base = {'service': 'SOS', 'request': 'GetObservation', 'version': '1.0.0', 'offering': station_urn, 'eventTime': event_time}
435-
for variable in ds.variables:
450+
# populate a parameters dictionary for download links for each 'observedProperty' type
451+
# and secondly for each 'responseFormat' per observedProperty:
452+
getobs_params_base = {'service': 'SOS', 'request': 'GetObservation', 'version': '1.0.0',
453+
'offering': station_urn, 'eventTime': event_time}
454+
for variable in station['variable_uris']:
436455
getobs_params = getobs_params_base.copy()
437456
getobs_params['observedProperty'] = variable
438457
variable = variable.split('/')[-1]
@@ -460,8 +479,10 @@ def get_stations_df(self, sos_url, station_urns_sel=None):
460479
self.log.write(u"\n\n\nSOS DescribeSensor request errors recap. Failed requests:")
461480
print("SOS DescribeSensor request errors recap. Failed requests:")
462481
for station_fail, msg in iteritems(sml_errors):
463-
self.log.write(u"\n{station} - {msg}. DescribeSensor URL: {ds}".format(station=station_fail, msg=msg, ds=describe_sensor_url[station_fail].replace("&", "&")))
464-
print("{station} - {msg}. DescribeSensor URL: {ds}".format(station=station_fail, msg=msg, ds=describe_sensor_url[station_fail].replace("&", "&")))
482+
self.log.write(u"\n{station} - {msg}. DescribeSensor URL: {ds}".format(
483+
station=station_fail, msg=msg, ds=describe_sensor_url[station_fail].replace("&", "&")))
484+
print("{station} - {msg}. DescribeSensor URL: {ds}".format(
485+
station=station_fail, msg=msg, ds=describe_sensor_url[station_fail].replace("&", "&")))
465486
if failures:
466487
self.log.write(u"\nStations in 'failures' list (should match DescribeSensor errors):")
467488
print("Stations in 'failures' list (should match DescribeSensor errors):")
@@ -527,7 +548,8 @@ def generate_iso(self, df):
527548
ctx['download_formats'] = station.download_formats
528549
ctx['getobs_req_dct'] = station.getobs_req_dct
529550

530-
output_filename = os.path.join(self.output_directory, "{serverName}-{station}.xml".format(serverName=self.server_name, station=station.station_urn.replace(":", "_")))
551+
output_filename = os.path.join(self.output_directory, "{serverName}-{station}.xml".format(
552+
serverName=self.server_name, station=station.station_urn.replace(":", "_")))
531553
try:
532554
iso_xml = template.render(ctx)
533555
output_file = io.open(output_filename, mode='wt', encoding='utf8')
@@ -551,13 +573,15 @@ def generate_describe_sensor_url(self, sos, procedure=None, oFrmt=None):
551573
"""
552574
# generate a DescribeSensor request to include in the ISO output (lifted from OWSlib):
553575
try:
554-
base_url = next((m.get('url') for m in sos.getOperationByName('DescribeSensor').methods if m.get('type').lower() == "get"))
576+
base_url = next((m.get('url') for m in sos.getOperationByName('DescribeSensor').methods
577+
if m.get('type').lower() == "get"))
555578
except StopIteration:
556579
base_url = sos.url
557580

558581
if not base_url.endswith("?"):
559582
base_url = base_url + "?"
560-
params = {'service': 'SOS', 'version': sos.version, 'request': 'DescribeSensor', 'procedure': procedure, 'outputFormat': oFrmt}
583+
params = {'service': 'SOS', 'version': sos.version, 'request': 'DescribeSensor',
584+
'procedure': procedure, 'outputFormat': oFrmt}
561585
return base_url + unquote_plus(urlencode(params))
562586

563587
def create_output_dir(self):

0 commit comments

Comments
 (0)