Skip to content

Commit f1bf097

Browse files
authored
refactor: GFZ ISDC ftp server is being retired (#168)
* Update pixi.lock
1 parent 9aa323e commit f1bf097

File tree

9 files changed

+296
-147
lines changed

9 files changed

+296
-147
lines changed
Lines changed: 122 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#!/usr/bin/env python
22
u"""
3-
gfz_isdc_dealiasing_ftp.py
4-
Written by Tyler Sutterley (05/2023)
3+
gfz_isdc_dealiasing_sync.py
4+
Written by Tyler Sutterley (10/2025)
55
Syncs GRACE Level-1b dealiasing products from the GFZ Information
66
System and Data Center (ISDC)
7+
78
Optionally outputs as monthly tar files
89
910
CALLING SEQUENCE:
10-
python gfz_isdc_dealiasing_ftp.py --year=2015 --release=RL06 --tar
11+
python gfz_isdc_dealiasing_sync.py --year=2015 --release=RL06 --tar
1112
1213
COMMAND LINE OPTIONS:
1314
-D X, --directory X: working data directory
@@ -30,6 +31,7 @@
3031
utilities.py: download and management utilities for syncing files
3132
3233
UPDATE HISTORY:
34+
Updated 10/2025: switch to https as ftp server is being retired
3335
Updated 05/2023: use pathlib to define and operate on paths
3436
Updated 03/2023: increase default year range to sync
3537
Updated 12/2022: single implicit import of gravity toolkit
@@ -51,8 +53,9 @@
5153
import sys
5254
import os
5355
import re
56+
import ssl
5457
import time
55-
import ftplib
58+
import shutil
5659
import logging
5760
import pathlib
5861
import tarfile
@@ -62,7 +65,7 @@
6265

6366
# PURPOSE: syncs GRACE Level-1b dealiasing products from the GFZ data server
6467
# and optionally outputs as monthly tar files
65-
def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
68+
def gfz_isdc_dealiasing_sync(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
6669
TIMEOUT=None, LOG=False, CLOBBER=False, MODE=None):
6770
# check if directory exists and recursively create if not
6871
base_dir = pathlib.Path(base_dir).expanduser().absolute()
@@ -81,10 +84,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
8184
# standard output (terminal output)
8285
logging.basicConfig(level=logging.INFO)
8386

84-
# remote HOST for DREL on GFZ data server
85-
# connect and login to GFZ ftp server
86-
ftp = ftplib.FTP('isdcftp.gfz-potsdam.de', timeout=TIMEOUT)
87-
ftp.login()
87+
# GFZ ISDC https host
88+
HOST = 'https://isdc-data.gfz.de/'
8889

8990
# compile regular expression operator for years to sync
9091
if YEAR is None:
@@ -97,9 +98,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
9798
SUFFIX = dict(RL04='tar.gz', RL05='tar.gz', RL06='tgz')
9899

99100
# find remote yearly directories for DREL
100-
YRS,_ = gravtk.utilities.ftp_list([ftp.host,'grace',
101-
'Level-1B', 'GFZ','AOD',DREL], timeout=TIMEOUT, basename=True,
102-
pattern=R1, sort=True)
101+
YRS,_ = http_list([HOST,'grace','Level-1B', 'GFZ','AOD',DREL],
102+
timeout=TIMEOUT, basename=True, pattern=R1, sort=True)
103103
# for each year
104104
for Y in YRS:
105105
# for each month of interest
@@ -114,8 +114,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
114114
# will extract year and month and calendar day from the ascii file
115115
regex_pattern = r'AOD1B_({0})-({1:02d})-(\d+)_X_\d+.asc.gz$'
116116
R2 = re.compile(regex_pattern.format(Y,M), re.VERBOSE)
117-
remote_files,remote_mtimes = gravtk.utilities.ftp_list(
118-
[ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y],
117+
remote_files,remote_mtimes = http_list(
118+
[HOST,'grace','Level-1B','GFZ','AOD',DREL,Y],
119119
timeout=TIMEOUT, basename=True, pattern=R2, sort=True)
120120
file_count = len(remote_files)
121121
# if compressing into monthly tar files
@@ -124,10 +124,10 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
124124
tar = tarfile.open(name=local_tar_file, mode='w:gz')
125125
for fi,remote_mtime in zip(remote_files,remote_mtimes):
126126
# remote version of each input file
127-
remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
128-
logging.info(posixpath.join('ftp://',*remote))
127+
remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
128+
logging.info(posixpath.join(*remote))
129129
# retrieve bytes from remote file
130-
remote_buffer = gravtk.utilities.from_ftp(remote,
130+
remote_buffer = gravtk.utilities.from_sync(remote,
131131
timeout=TIMEOUT)
132132
# add file to tar
133133
tar_info = tarfile.TarInfo(name=fi)
@@ -142,23 +142,96 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
142142
# copy each gzip file and keep as individual daily files
143143
for fi,remote_mtime in zip(remote_files,remote_mtimes):
144144
# remote and local version of each input file
145-
remote = [ftp.host,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
145+
remote = [HOST,'grace','Level-1B','GFZ','AOD',DREL,Y,fi]
146146
local_file = grace_dir.joinpath(fi)
147-
ftp_mirror_file(ftp,remote,remote_mtime,local_file,
147+
http_pull_file(remote,remote_mtime,local_file,
148148
CLOBBER=CLOBBER, MODE=MODE)
149149

150-
# close the ftp connection
151-
ftp.quit()
152150
# close log file and set permissions level to MODE
153151
if LOG:
154152
LOGFILE.chmod(mode=MODE)
155153

154+
# PURPOSE: list a directory on the GFZ https server
155+
def http_list(
156+
HOST: str | list,
157+
timeout: int | None = None,
158+
context: ssl.SSLContext = gravtk.utilities._default_ssl_context,
159+
pattern: str | re.Pattern = '',
160+
sort: bool = False
161+
):
162+
"""
163+
List a directory on the GFZ https Server
164+
165+
Parameters
166+
----------
167+
HOST: str or list
168+
remote http host path
169+
timeout: int or NoneType, default None
170+
timeout in seconds for blocking operations
171+
context: obj, default gravity_toolkit.utilities._default_ssl_context
172+
SSL context for ``urllib`` opener object
173+
pattern: str, default ''
174+
regular expression pattern for reducing list
175+
sort: bool, default False
176+
sort output list
177+
178+
Returns
179+
-------
180+
colnames: list
181+
column names in a directory
182+
collastmod: list
183+
last modification times for items in the directory
184+
"""
185+
# verify inputs for remote http host
186+
if isinstance(HOST, str):
187+
HOST = gravtk.utilities.url_split(HOST)
188+
# regular expression pattern for finding files and modification times
189+
parser = r'\<a\shref=.*?\>(.*?)\<\/a\>\s+(\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2})'
190+
rx = re.compile(parser, re.VERBOSE)
191+
# try listing from http
192+
try:
193+
# Create and submit request.
194+
request = gravtk.utilities.urllib2.Request(posixpath.join(*HOST))
195+
response = gravtk.utilities.urllib2.urlopen(request,
196+
timeout=timeout, context=context)
197+
except Exception as exc:
198+
raise Exception('List error from {0}'.format(posixpath.join(*HOST)))
199+
# read the directory listing
200+
contents = response.readlines()
201+
# read and parse request for files (column names and modified times)
202+
lines = [l for l in contents if rx.search(l.decode('utf-8'))]
203+
# column names and last modified times
204+
colnames = [None]*len(lines)
205+
collastmod = [None]*len(lines)
206+
for i, l in enumerate(lines):
207+
colnames[i], lastmod = rx.findall(l.decode('utf-8')).pop()
208+
# get the Unix timestamp value for a modification time
209+
collastmod[i] = gravtk.utilities.get_unix_time(lastmod,
210+
format='%Y-%m-%d %H:%M')
211+
# reduce using regular expression pattern
212+
if pattern:
213+
i = [i for i,f in enumerate(colnames) if re.search(pattern, f)]
214+
# reduce list of column names and last modified times
215+
colnames = [colnames[indice] for indice in i]
216+
collastmod = [collastmod[indice] for indice in i]
217+
# sort the list
218+
if sort:
219+
i = [i for i,j in sorted(enumerate(colnames), key=lambda i: i[1])]
220+
# sort list of column names and last modified times
221+
colnames = [colnames[indice] for indice in i]
222+
collastmod = [collastmod[indice] for indice in i]
223+
# return the list of column names and last modified times
224+
return (colnames, collastmod)
225+
156226
# PURPOSE: pull file from a remote host checking if file exists locally
157227
# and if the remote file is newer than the local file
158-
def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
159-
CLOBBER=False,MODE=0o775):
160-
# path to remote file
161-
remote_file = posixpath.join(*remote_path[1:])
228+
def http_pull_file(remote_path, remote_mtime, local_file,
229+
TIMEOUT=0, LIST=False, CLOBBER=False, MODE=0o775):
230+
# verify inputs for remote http host
231+
if isinstance(remote_path, str):
232+
remote_path = gravtk.utilities.url_split(remote_path)
233+
# construct remote file path
234+
remote_file = posixpath.join(*remote_path)
162235
# if file exists in file system: check if remote file is newer
163236
TEST = False
164237
OVERWRITE = ' (clobber)'
@@ -178,15 +251,24 @@ def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
178251
# if file does not exist locally, is to be overwritten, or CLOBBER is set
179252
if TEST or CLOBBER:
180253
# Printing files transferred
181-
remote_ftp_url = posixpath.join('ftp://',*remote_path)
182-
logging.info(f'{remote_ftp_url} -->')
183-
logging.info(f'\t{local_file}{OVERWRITE}\n')
184-
# copy remote file contents to local file
185-
with local_file.open(mode='wb') as f:
186-
ftp.retrbinary(f'RETR {remote_file}', f.write)
187-
# keep remote modification time of file and local access time
188-
os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
189-
local_file.chmod(mode=MODE)
254+
logging.info(f'{remote_file} --> ')
255+
logging.info(f'\t{str(local_file)}{OVERWRITE}\n')
256+
# if executing copy command (not only printing the files)
257+
if not LIST:
258+
# Create and submit request. There are a wide range of exceptions
259+
# that can be thrown here, including HTTPError and URLError.
260+
request = gravtk.utilities.urllib2.Request(remote_file)
261+
response = gravtk.utilities.urllib2.urlopen(request,
262+
timeout=TIMEOUT)
263+
# chunked transfer encoding size
264+
CHUNK = 16 * 1024
265+
# copy contents to local file using chunked transfer encoding
266+
# transfer should work properly with ascii and binary data formats
267+
with local_file.open(mode='wb') as f:
268+
shutil.copyfileobj(response, f, CHUNK)
269+
# keep remote modification time of file and local access time
270+
os.utime(local_file, (local_file.stat().st_atime, remote_mtime))
271+
local_file.chmod(mode=MODE)
190272

191273
# PURPOSE: create argument parser
192274
def arguments():
@@ -243,14 +325,17 @@ def main():
243325
parser = arguments()
244326
args,_ = parser.parse_known_args()
245327

328+
# GFZ ISDC https host
329+
HOST = 'https://isdc-data.gfz.de/'
246330
# check internet connection before attempting to run program
247-
HOST = 'isdcftp.gfz-potsdam.de'
248-
if gravtk.utilities.check_ftp_connection(HOST):
331+
if gravtk.utilities.check_connection(HOST):
249332
for DREL in args.release:
250-
gfz_isdc_dealiasing_ftp(args.directory, DREL=DREL,
333+
gfz_isdc_dealiasing_sync(args.directory, DREL=DREL,
251334
YEAR=args.year, MONTHS=args.month, TAR=args.tar,
252335
TIMEOUT=args.timeout, LOG=args.log,
253336
CLOBBER=args.clobber, MODE=args.mode)
337+
else:
338+
raise RuntimeError('Check internet connection')
254339

255340
# run main program
256341
if __name__ == '__main__':

0 commit comments

Comments
 (0)