Skip to content

Commit aaa01b1

Browse files
candytacoCopilot
andauthored
Multiple-threaded upload/downloads (#109)
* add `threads` argument to backend interface * Update S3Client to use TransferConfig in up/downloads TransferConfig allows multi-threaded uploads, and also hides multipart uploads, so we don't implement that on our layer anymore. * add threads to options file * remove unused import in options * add optional `threads` argument to I/O functions in cottoncandy interfaces to allow setting thread count dynamically at runtime * update gdriveclient to have `threads` argument for interface consistency * upload localclient to have `threads` for interface consistency * threads in docstrings * remove print debug statement * fix missing comma * tabs -> spaces Co-authored-by: Copilot <[email protected]> * Add ACL to upload_file Co-authored-by: Copilot <[email protected]> * Update cottoncandy/s3client.py Co-authored-by: Copilot <[email protected]> * copilot docstring suggestion Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent fb064fb commit aaa01b1

File tree

8 files changed

+140
-247
lines changed

8 files changed

+140
-247
lines changed

cottoncandy/backend.py

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ def check_file_exists(self, file_name, bucket_name):
3030
pass
3131

3232
@abstractmethod
33-
def upload_stream(self, stream, cloud_name, metadata, permissions):
33+
def upload_stream(self, stream, cloud_name, metadata, permissions, threads):
3434
"""Uploads a stream object with a .read() function
3535
3636
Parameters
3737
----------
38+
threads
3839
stream : stream
3940
streaming object
4041
cloud_name : str
@@ -43,6 +44,8 @@ def upload_stream(self, stream, cloud_name, metadata, permissions):
4344
custom metadata for this file
4445
permissions : str?
4546
permissions for this file
47+
threads : int
48+
number of threads to use
4649
4750
Returns
4851
-------
@@ -51,57 +54,39 @@ def upload_stream(self, stream, cloud_name, metadata, permissions):
5154
pass
5255

5356
@abstractmethod
54-
def upload_file(self, file_name, cloud_name, permissions):
57+
def upload_file(self, file_name, cloud_name, permissions, threads):
5558
"""Uploads a file from disk
5659
5760
Parameters
5861
----------
62+
threads
5963
file_name : str
6064
name of file to upload
6165
cloud_name : str
6266
name to use on the cloud
6367
permissions : str?
6468
permissions for this file
69+
threads : int
70+
number of threads to use
6571
6672
Returns
6773
-------
6874
bool, upload success
6975
"""
7076
pass
7177

72-
@abstractmethod
73-
def upload_multipart(self, stream, cloud_name, metadata, permissions, buffersize, verbose):
74-
"""Multi-part upload for large stream objects
75-
76-
Parameters
77-
----------
78-
stream : stream
79-
streaming object
80-
cloud_name : str
81-
name to use on cloud
82-
metadata : dict
83-
custom metadata
84-
permissions : str?
85-
permissions for this file
86-
buffersize : int
87-
s3 uploading buffersize
88-
verbose : bool
89-
s3 verbosity
90-
91-
Returns
92-
-------
93-
bool, upload success
94-
"""
95-
pass
9678

9779
@abstractmethod
98-
def download_stream(self, cloud_name):
80+
def download_stream(self, cloud_name, threads):
9981
"""Downloads a object to an in-memory stream
10082
10183
Parameters
10284
----------
85+
threads
10386
cloud_name : str
10487
name of object to download
88+
threads : int
89+
number of threads to use
10590
10691
Returns
10792
-------
@@ -110,15 +95,18 @@ def download_stream(self, cloud_name):
11095
pass
11196

11297
@abstractmethod
113-
def download_to_file(self, cloud_name, file_name):
98+
def download_to_file(self, cloud_name, file_name, threads):
11499
"""Downloads an object directly to disk
115100
116101
Parameters
117102
----------
103+
threads
118104
cloud_name : str
119105
name of object to download
120106
file_name : str
121107
name on disk to use
108+
threads : int
109+
number of threads to use
122110
123111
Returns
124112
-------

cottoncandy/defaults.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mandatory_bucket_prefix =
1212
force_bucket_creation = False
1313
path_separator = /
1414
signature_version =
15+
threads = 8
1516

1617
[upload_settings]
1718
# in MB, except max_mpu_size_TB, and max_mpu_parts

cottoncandy/gdriveclient.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def delete(self, file_name, recursive=False, delete=False):
411411

412412
#### File IO functions
413413

414-
def upload_file(self, file_name, cloud_name=None, permissions=None):
414+
def upload_file(self, file_name, cloud_name=None, permissions=None, threads = 1):
415415
"""Uploads from file on disk
416416
417417
Parameters
@@ -468,25 +468,22 @@ def upload_file(self, file_name, cloud_name=None, permissions=None):
468468

469469
return True
470470

471-
def upload_stream(self, stream, name, properties=None, permissions=None):
471+
def upload_stream(self, stream, cloud_name, properties, permissions, threads = 1):
472472
"""Upload a stream with a .read() method
473473
474474
Parameters
475475
----------
476+
threads
476477
stream : stream
477478
stream to upload
478-
name : str
479-
name to use for cloud file
480-
properties : dict
481-
custom metadata
482479
483480
Returns
484481
-------
485482
: bool
486483
success of operation
487484
"""
488485
# cloudName formatting
489-
name = re.sub('^./', '', name)
486+
name = re.sub('^./', '', cloud_name)
490487

491488
# not current dir
492489
current_directory = None
@@ -524,11 +521,8 @@ def upload_stream(self, stream, name, properties=None, permissions=None):
524521

525522
return True
526523

527-
def upload_multipart(self, stream, cloud_name, properties=None, permissions=None, buffersize = -1, verbose = False):
528-
return self.upload_stream(stream, cloud_name, properties, permissions)
529-
530524

531-
def download_to_file(self, drive_file, local_file=None):
525+
def download_to_file(self, drive_file, local_file=None, threads = 1):
532526
"""Download a file to disk
533527
534528
Parameters
@@ -557,7 +551,7 @@ def download_to_file(self, drive_file, local_file=None):
557551

558552
return True
559553

560-
def download_stream(self, drive_file):
554+
def download_stream(self, drive_file, threads = 1):
561555
"""Downloads a file to memory
562556
563557
Parameters

0 commit comments

Comments
 (0)