diff --git a/CHANGELOG.md b/CHANGELOG.md index 56926bf8..5c40e84a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,10 +13,15 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel - New Use Case: [Create a Dataset Template](./docs/useCases.md#create-a-dataset-template) under Collections. - New Use Case: [Update Terms of Access](./docs/useCases.md#update-terms-of-access). +- Files: Added `FilesConfig` class for configuring file upload behavior at runtime, including: + - `useS3Tagging`: Option to disable S3 object tagging (`x-amz-tagging` header) for S3-compatible storage that doesn't support tagging. Default: `true`. + - `maxMultipartRetries`: Configurable maximum retries for multipart upload parts. Default: `5`. + - `fileUploadTimeoutMs`: Configurable timeout for file upload operations. Default: `60000`. ### Changed - Add pagination query parameters to Dataset Version Summeries and File Version Summaries use cases +- Files: `DirectUploadClient` constructor now accepts a `DirectUploadClientConfig` object instead of a plain number for `maxMultipartRetries`. ### Fixed diff --git a/src/files/index.ts b/src/files/index.ts index a9d38386..485b2238 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -9,7 +9,7 @@ import { GetFile } from './domain/useCases/GetFile' import { GetFileCitation } from './domain/useCases/GetFileCitation' import { GetFileAndDataset } from './domain/useCases/GetFileAndDataset' import { UploadFile } from './domain/useCases/UploadFile' -import { DirectUploadClient } from './infra/clients/DirectUploadClient' +import { DirectUploadClient, DirectUploadClientConfig } from './infra/clients/DirectUploadClient' import { AddUploadedFilesToDataset } from './domain/useCases/AddUploadedFilesToDataset' import { DeleteFile } from './domain/useCases/DeleteFile' import { ReplaceFile } from './domain/useCases/ReplaceFile' @@ -20,8 +20,40 @@ import { UpdateFileCategories } from './domain/useCases/UpdateFileCategories' import { GetFileVersionSummaries } from './domain/useCases/GetFileVersionSummaries' import { IsFileDeleted } from './domain/useCases/IsFileDeleted' +/** + * Configuration for file upload operations. + * Use FilesConfig.init() to configure upload behavior before using uploadFile. + */ +class FilesConfig { + private static uploadConfig: DirectUploadClientConfig = {} + + /** + * Initialize file upload configuration. + * @param config - Configuration options for file uploads + * @param config.useS3Tagging - Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). + * Set to false if your S3 implementation doesn't support object tagging. Default: true + * @param config.maxMultipartRetries - Maximum number of retries for multipart upload parts. Default: 5 + * @param config.fileUploadTimeoutMs - Timeout in milliseconds for file upload operations. Default: 60000 + */ + static init(config: DirectUploadClientConfig) { + this.uploadConfig = config + } + + static getConfig(): DirectUploadClientConfig { + return this.uploadConfig + } +} + const filesRepository = new FilesRepository() -const directUploadClient = new DirectUploadClient(filesRepository) +// DirectUploadClient is created lazily to allow configuration before first use +let directUploadClientInstance: DirectUploadClient | null = null + +const getDirectUploadClient = (): DirectUploadClient => { + if (!directUploadClientInstance) { + directUploadClientInstance = new DirectUploadClient(filesRepository, FilesConfig.getConfig()) + } + return directUploadClientInstance +} const getDatasetFiles = new GetDatasetFiles(filesRepository) const getDatasetFileCounts = new GetDatasetFileCounts(filesRepository) @@ -32,7 +64,6 @@ const getDatasetFilesTotalDownloadSize = new GetDatasetFilesTotalDownloadSize(fi const getFile = new GetFile(filesRepository) const getFileAndDataset = new GetFileAndDataset(filesRepository) const getFileCitation = new GetFileCitation(filesRepository) -const uploadFile = new UploadFile(directUploadClient) const addUploadedFilesToDataset = new AddUploadedFilesToDataset(filesRepository) const deleteFile = new DeleteFile(filesRepository) const replaceFile = new ReplaceFile(filesRepository) @@ -43,6 +74,27 @@ const updateFileCategories = new UpdateFileCategories(filesRepository) const getFileVersionSummaries = new GetFileVersionSummaries(filesRepository) const isFileDeleted = new IsFileDeleted(filesRepository) +// uploadFile is created lazily to respect FilesConfig settings +let uploadFileInstance: UploadFile | null = null + +/** + * Uploads a file to remote storage and returns the storage identifier. + * Respects FilesConfig settings (call FilesConfig.init() before first upload if you need custom config). + */ +const uploadFile = { + execute: ( + datasetId: number | string, + file: File, + progress: (now: number) => void, + abortController: AbortController + ): Promise => { + if (!uploadFileInstance) { + uploadFileInstance = new UploadFile(getDirectUploadClient()) + } + return uploadFileInstance.execute(datasetId, file, progress, abortController) + } +} + export { getDatasetFiles, getFileDownloadCount, @@ -62,7 +114,8 @@ export { updateFileCategories, replaceFile, getFileVersionSummaries, - isFileDeleted + isFileDeleted, + FilesConfig } export { FileModel as File, FileEmbargo, FileChecksum } from './domain/models/FileModel' diff --git a/src/files/infra/clients/DirectUploadClient.ts b/src/files/infra/clients/DirectUploadClient.ts index 8dfd1a9b..c2120d73 100644 --- a/src/files/infra/clients/DirectUploadClient.ts +++ b/src/files/infra/clients/DirectUploadClient.ts @@ -15,15 +15,27 @@ import { MultipartAbortError } from './errors/MultipartAbortError' import { FileUploadCancelError } from './errors/FileUploadCancelError' import { ApiConstants } from '../../../core/infra/repositories/ApiConstants' +export interface DirectUploadClientConfig { + /** Maximum number of retries for multipart upload parts. Default: 5 */ + maxMultipartRetries?: number + /** Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). Default: true + * Set to false if your S3 implementation doesn't support object tagging. */ + useS3Tagging?: boolean + /** Timeout in milliseconds for file upload operations. Default: 60000 */ + fileUploadTimeoutMs?: number +} + export class DirectUploadClient implements IDirectUploadClient { private filesRepository: IFilesRepository private maxMultipartRetries: number + private useS3Tagging: boolean + private readonly fileUploadTimeoutMs: number - private readonly fileUploadTimeoutMs: number = 60_000 - - constructor(filesRepository: IFilesRepository, maxMultipartRetries = 5) { + constructor(filesRepository: IFilesRepository, config: DirectUploadClientConfig = {}) { this.filesRepository = filesRepository - this.maxMultipartRetries = maxMultipartRetries + this.maxMultipartRetries = config.maxMultipartRetries ?? 5 + this.useS3Tagging = config.useS3Tagging ?? true + this.fileUploadTimeoutMs = config.fileUploadTimeoutMs ?? 60_000 } public async uploadFile( @@ -59,11 +71,15 @@ export class DirectUploadClient implements IDirectUploadClient { ): Promise { try { const arrayBuffer = await file.arrayBuffer() + const headers: Record = { + 'Content-Type': 'application/octet-stream' + } + // Only add S3 tagging header if enabled (some S3 implementations don't support it) + if (this.useS3Tagging) { + headers['x-amz-tagging'] = 'dv-state=temp' + } await axios.put(destination.urls[0], arrayBuffer, { - headers: { - 'Content-Type': 'application/octet-stream', - 'x-amz-tagging': 'dv-state=temp' - }, + headers, timeout: this.fileUploadTimeoutMs, signal: abortController.signal, onUploadProgress: (progressEvent) => diff --git a/test/unit/files/DirectUploadClient.test.ts b/test/unit/files/DirectUploadClient.test.ts index 38f1921a..c0646cca 100644 --- a/test/unit/files/DirectUploadClient.test.ts +++ b/test/unit/files/DirectUploadClient.test.ts @@ -87,6 +87,56 @@ describe('uploadFile', () => { expect(actual).toEqual(testDestination.storageId) }) + + test('should include S3 tagging header by default', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.objectContaining({ + 'x-amz-tagging': 'dv-state=temp' + }) + }) + ) + }) + + test('should not include S3 tagging header when useS3Tagging is false', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub, { useS3Tagging: false }) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.not.objectContaining({ + 'x-amz-tagging': expect.anything() + }) + }) + ) + }) }) describe('Multiple parts file', () => { @@ -113,7 +163,7 @@ describe('uploadFile', () => { jest.spyOn(axios, 'delete').mockResolvedValue(undefined) jest.spyOn(axios, 'put').mockRejectedValue('error') - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() @@ -143,7 +193,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartAbortError @@ -165,7 +215,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartCompletionError ) @@ -187,7 +237,7 @@ describe('uploadFile', () => { .mockResolvedValueOnce(successfulPartResponse) .mockResolvedValueOnce(undefined) - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() diff --git a/test/unit/files/FilesConfig.test.ts b/test/unit/files/FilesConfig.test.ts new file mode 100644 index 00000000..c2ae7487 --- /dev/null +++ b/test/unit/files/FilesConfig.test.ts @@ -0,0 +1,59 @@ +import { FilesConfig } from '../../../src/files' + +describe('FilesConfig', () => { + beforeEach(() => { + // Reset config before each test + FilesConfig.init({}) + }) + + describe('init', () => { + test('should set useS3Tagging configuration', () => { + FilesConfig.init({ useS3Tagging: false }) + + const config = FilesConfig.getConfig() + expect(config.useS3Tagging).toBe(false) + }) + + test('should set maxMultipartRetries configuration', () => { + FilesConfig.init({ maxMultipartRetries: 10 }) + + const config = FilesConfig.getConfig() + expect(config.maxMultipartRetries).toBe(10) + }) + + test('should set fileUploadTimeoutMs configuration', () => { + FilesConfig.init({ fileUploadTimeoutMs: 120000 }) + + const config = FilesConfig.getConfig() + expect(config.fileUploadTimeoutMs).toBe(120000) + }) + + test('should set multiple configuration options', () => { + FilesConfig.init({ + useS3Tagging: false, + maxMultipartRetries: 3, + fileUploadTimeoutMs: 30000 + }) + + const config = FilesConfig.getConfig() + expect(config.useS3Tagging).toBe(false) + expect(config.maxMultipartRetries).toBe(3) + expect(config.fileUploadTimeoutMs).toBe(30000) + }) + }) + + describe('getConfig', () => { + test('should return empty config by default', () => { + const config = FilesConfig.getConfig() + expect(config).toEqual({}) + }) + + test('should return previously set config', () => { + const expectedConfig = { useS3Tagging: true, maxMultipartRetries: 5 } + FilesConfig.init(expectedConfig) + + const config = FilesConfig.getConfig() + expect(config).toEqual(expectedConfig) + }) + }) +})