11#!/usr/bin/env python
22u"""
3- gfz_isdc_dealiasing_ftp .py
4- Written by Tyler Sutterley (05/2023 )
3+ gfz_isdc_dealiasing_sync .py
4+ Written by Tyler Sutterley (10/2025 )
55Syncs GRACE Level-1b dealiasing products from the GFZ Information
66 System and Data Center (ISDC)
7+
78Optionally outputs as monthly tar files
89
910CALLING SEQUENCE:
10- python gfz_isdc_dealiasing_ftp .py --year=2015 --release=RL06 --tar
11+ python gfz_isdc_dealiasing_sync .py --year=2015 --release=RL06 --tar
1112
1213COMMAND LINE OPTIONS:
1314 -D X, --directory X: working data directory
3031 utilities.py: download and management utilities for syncing files
3132
3233UPDATE HISTORY:
34+ Updated 10/2025: switch to https as ftp server is being retired
3335 Updated 05/2023: use pathlib to define and operate on paths
3436 Updated 03/2023: increase default year range to sync
3537 Updated 12/2022: single implicit import of gravity toolkit
5153import sys
5254import os
5355import re
56+ import ssl
5457import time
55- import ftplib
58+ import shutil
5659import logging
5760import pathlib
5861import tarfile
6265
6366# PURPOSE: syncs GRACE Level-1b dealiasing products from the GFZ data server
6467# and optionally outputs as monthly tar files
65- def gfz_isdc_dealiasing_ftp (base_dir , DREL , YEAR = None , MONTHS = None , TAR = False ,
68+ def gfz_isdc_dealiasing_sync (base_dir , DREL , YEAR = None , MONTHS = None , TAR = False ,
6669 TIMEOUT = None , LOG = False , CLOBBER = False , MODE = None ):
6770 # check if directory exists and recursively create if not
6871 base_dir = pathlib .Path (base_dir ).expanduser ().absolute ()
@@ -81,10 +84,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
8184 # standard output (terminal output)
8285 logging .basicConfig (level = logging .INFO )
8386
84- # remote HOST for DREL on GFZ data server
85- # connect and login to GFZ ftp server
86- ftp = ftplib .FTP ('isdcftp.gfz-potsdam.de' , timeout = TIMEOUT )
87- ftp .login ()
87+ # GFZ ISDC https host
88+ HOST = 'https://isdc-data.gfz.de/'
8889
8990 # compile regular expression operator for years to sync
9091 if YEAR is None :
@@ -97,9 +98,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
9798 SUFFIX = dict (RL04 = 'tar.gz' , RL05 = 'tar.gz' , RL06 = 'tgz' )
9899
99100 # find remote yearly directories for DREL
100- YRS ,_ = gravtk .utilities .ftp_list ([ftp .host ,'grace' ,
101- 'Level-1B' , 'GFZ' ,'AOD' ,DREL ], timeout = TIMEOUT , basename = True ,
102- pattern = R1 , sort = True )
101+ YRS ,_ = http_list ([HOST ,'grace' ,'Level-1B' , 'GFZ' ,'AOD' ,DREL ],
102+ timeout = TIMEOUT , basename = True , pattern = R1 , sort = True )
103103 # for each year
104104 for Y in YRS :
105105 # for each month of interest
@@ -114,8 +114,8 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
114114 # will extract year and month and calendar day from the ascii file
115115 regex_pattern = r'AOD1B_({0})-({1:02d})-(\d+)_X_\d+.asc.gz$'
116116 R2 = re .compile (regex_pattern .format (Y ,M ), re .VERBOSE )
117- remote_files ,remote_mtimes = gravtk . utilities . ftp_list (
118- [ftp . host ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ],
117+ remote_files ,remote_mtimes = http_list (
118+ [HOST ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ],
119119 timeout = TIMEOUT , basename = True , pattern = R2 , sort = True )
120120 file_count = len (remote_files )
121121 # if compressing into monthly tar files
@@ -124,10 +124,10 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
124124 tar = tarfile .open (name = local_tar_file , mode = 'w:gz' )
125125 for fi ,remote_mtime in zip (remote_files ,remote_mtimes ):
126126 # remote version of each input file
127- remote = [ftp . host ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ,fi ]
128- logging .info (posixpath .join ('ftp://' , * remote ))
127+ remote = [HOST ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ,fi ]
128+ logging .info (posixpath .join (* remote ))
129129 # retrieve bytes from remote file
130- remote_buffer = gravtk .utilities .from_ftp (remote ,
130+ remote_buffer = gravtk .utilities .from_sync (remote ,
131131 timeout = TIMEOUT )
132132 # add file to tar
133133 tar_info = tarfile .TarInfo (name = fi )
@@ -142,23 +142,96 @@ def gfz_isdc_dealiasing_ftp(base_dir, DREL, YEAR=None, MONTHS=None, TAR=False,
142142 # copy each gzip file and keep as individual daily files
143143 for fi ,remote_mtime in zip (remote_files ,remote_mtimes ):
144144 # remote and local version of each input file
145- remote = [ftp . host ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ,fi ]
145+ remote = [HOST ,'grace' ,'Level-1B' ,'GFZ' ,'AOD' ,DREL ,Y ,fi ]
146146 local_file = grace_dir .joinpath (fi )
147- ftp_mirror_file ( ftp , remote ,remote_mtime ,local_file ,
147+ http_pull_file ( remote ,remote_mtime ,local_file ,
148148 CLOBBER = CLOBBER , MODE = MODE )
149149
150- # close the ftp connection
151- ftp .quit ()
152150 # close log file and set permissions level to MODE
153151 if LOG :
154152 LOGFILE .chmod (mode = MODE )
155153
154+ # PURPOSE: list a directory on the GFZ https server
155+ def http_list (
156+ HOST : str | list ,
157+ timeout : int | None = None ,
158+ context : ssl .SSLContext = gravtk .utilities ._default_ssl_context ,
159+ pattern : str | re .Pattern = '' ,
160+ sort : bool = False
161+ ):
162+ """
163+ List a directory on the GFZ https Server
164+
165+ Parameters
166+ ----------
167+ HOST: str or list
168+ remote http host path
169+ timeout: int or NoneType, default None
170+ timeout in seconds for blocking operations
171+ context: obj, default gravity_toolkit.utilities._default_ssl_context
172+ SSL context for ``urllib`` opener object
173+ pattern: str, default ''
174+ regular expression pattern for reducing list
175+ sort: bool, default False
176+ sort output list
177+
178+ Returns
179+ -------
180+ colnames: list
181+ column names in a directory
182+ collastmod: list
183+ last modification times for items in the directory
184+ """
185+ # verify inputs for remote http host
186+ if isinstance (HOST , str ):
187+ HOST = gravtk .utilities .url_split (HOST )
188+ # regular expression pattern for finding files and modification times
189+ parser = r'\<a\shref=.*?\>(.*?)\<\/a\>\s+(\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2})'
190+ rx = re .compile (parser , re .VERBOSE )
191+ # try listing from http
192+ try :
193+ # Create and submit request.
194+ request = gravtk .utilities .urllib2 .Request (posixpath .join (* HOST ))
195+ response = gravtk .utilities .urllib2 .urlopen (request ,
196+ timeout = timeout , context = context )
197+ except Exception as exc :
198+ raise Exception ('List error from {0}' .format (posixpath .join (* HOST )))
199+ # read the directory listing
200+ contents = response .readlines ()
201+ # read and parse request for files (column names and modified times)
202+ lines = [l for l in contents if rx .search (l .decode ('utf-8' ))]
203+ # column names and last modified times
204+ colnames = [None ]* len (lines )
205+ collastmod = [None ]* len (lines )
206+ for i , l in enumerate (lines ):
207+ colnames [i ], lastmod = rx .findall (l .decode ('utf-8' )).pop ()
208+ # get the Unix timestamp value for a modification time
209+ collastmod [i ] = gravtk .utilities .get_unix_time (lastmod ,
210+ format = '%Y-%m-%d %H:%M' )
211+ # reduce using regular expression pattern
212+ if pattern :
213+ i = [i for i ,f in enumerate (colnames ) if re .search (pattern , f )]
214+ # reduce list of column names and last modified times
215+ colnames = [colnames [indice ] for indice in i ]
216+ collastmod = [collastmod [indice ] for indice in i ]
217+ # sort the list
218+ if sort :
219+ i = [i for i ,j in sorted (enumerate (colnames ), key = lambda i : i [1 ])]
220+ # sort list of column names and last modified times
221+ colnames = [colnames [indice ] for indice in i ]
222+ collastmod = [collastmod [indice ] for indice in i ]
223+ # return the list of column names and last modified times
224+ return (colnames , collastmod )
225+
156226# PURPOSE: pull file from a remote host checking if file exists locally
157227# and if the remote file is newer than the local file
158- def ftp_mirror_file (ftp ,remote_path ,remote_mtime ,local_file ,
159- CLOBBER = False ,MODE = 0o775 ):
160- # path to remote file
161- remote_file = posixpath .join (* remote_path [1 :])
228+ def http_pull_file (remote_path , remote_mtime , local_file ,
229+ TIMEOUT = 0 , LIST = False , CLOBBER = False , MODE = 0o775 ):
230+ # verify inputs for remote http host
231+ if isinstance (remote_path , str ):
232+ remote_path = gravtk .utilities .url_split (remote_path )
233+ # construct remote file path
234+ remote_file = posixpath .join (* remote_path )
162235 # if file exists in file system: check if remote file is newer
163236 TEST = False
164237 OVERWRITE = ' (clobber)'
@@ -178,15 +251,24 @@ def ftp_mirror_file(ftp,remote_path,remote_mtime,local_file,
178251 # if file does not exist locally, is to be overwritten, or CLOBBER is set
179252 if TEST or CLOBBER :
180253 # Printing files transferred
181- remote_ftp_url = posixpath .join ('ftp://' ,* remote_path )
182- logging .info (f'{ remote_ftp_url } -->' )
183- logging .info (f'\t { local_file } { OVERWRITE } \n ' )
184- # copy remote file contents to local file
185- with local_file .open (mode = 'wb' ) as f :
186- ftp .retrbinary (f'RETR { remote_file } ' , f .write )
187- # keep remote modification time of file and local access time
188- os .utime (local_file , (local_file .stat ().st_atime , remote_mtime ))
189- local_file .chmod (mode = MODE )
254+ logging .info (f'{ remote_file } --> ' )
255+ logging .info (f'\t { str (local_file )} { OVERWRITE } \n ' )
256+ # if executing copy command (not only printing the files)
257+ if not LIST :
258+ # Create and submit request. There are a wide range of exceptions
259+ # that can be thrown here, including HTTPError and URLError.
260+ request = gravtk .utilities .urllib2 .Request (remote_file )
261+ response = gravtk .utilities .urllib2 .urlopen (request ,
262+ timeout = TIMEOUT )
263+ # chunked transfer encoding size
264+ CHUNK = 16 * 1024
265+ # copy contents to local file using chunked transfer encoding
266+ # transfer should work properly with ascii and binary data formats
267+ with local_file .open (mode = 'wb' ) as f :
268+ shutil .copyfileobj (response , f , CHUNK )
269+ # keep remote modification time of file and local access time
270+ os .utime (local_file , (local_file .stat ().st_atime , remote_mtime ))
271+ local_file .chmod (mode = MODE )
190272
191273# PURPOSE: create argument parser
192274def arguments ():
@@ -243,14 +325,17 @@ def main():
243325 parser = arguments ()
244326 args ,_ = parser .parse_known_args ()
245327
328+ # GFZ ISDC https host
329+ HOST = 'https://isdc-data.gfz.de/'
246330 # check internet connection before attempting to run program
247- HOST = 'isdcftp.gfz-potsdam.de'
248- if gravtk .utilities .check_ftp_connection (HOST ):
331+ if gravtk .utilities .check_connection (HOST ):
249332 for DREL in args .release :
250- gfz_isdc_dealiasing_ftp (args .directory , DREL = DREL ,
333+ gfz_isdc_dealiasing_sync (args .directory , DREL = DREL ,
251334 YEAR = args .year , MONTHS = args .month , TAR = args .tar ,
252335 TIMEOUT = args .timeout , LOG = args .log ,
253336 CLOBBER = args .clobber , MODE = args .mode )
337+ else :
338+ raise RuntimeError ('Check internet connection' )
254339
255340# run main program
256341if __name__ == '__main__' :
0 commit comments