UNPKG

@azure/storage-file-datalake

Version:

Microsoft Azure Storage SDK for JavaScript - DataLake

943 lines 61.1 kB
import { isTokenCredential } from "@azure/core-auth"; import { isNode } from "@azure/core-util"; import { isPipelineLike, newPipeline } from "./Pipeline"; import { BlobClient, BlockBlobClient } from "@azure/storage-blob"; import { AnonymousCredential } from "@azure/storage-blob"; import { StorageSharedKeyCredential } from "./credentials/StorageSharedKeyCredential"; import { BufferScheduler } from "../../storage-common/src"; import { DataLakeLeaseClient } from "./DataLakeLeaseClient"; import { PathOperationsImpl as Path } from "./generated/src/operations"; import { generateDataLakeSASQueryParameters, generateDataLakeSASQueryParametersInternal, } from "./sas/DataLakeSASSignatureValues"; import { StorageClient } from "./StorageClient"; import { toAccessControlChangeFailureArray, toAcl, toAclString, toBlobCpkInfo, toPermissions, toPermissionsString, toProperties, } from "./transforms"; import { Batch } from "./utils/Batch"; import { BLOCK_BLOB_MAX_BLOCKS, DEFAULT_HIGH_LEVEL_CONCURRENCY, ETagAny, FILE_MAX_SINGLE_UPLOAD_THRESHOLD, FILE_MAX_SIZE_BYTES, FILE_UPLOAD_DEFAULT_CHUNK_SIZE, FILE_UPLOAD_MAX_CHUNK_SIZE, } from "./utils/constants"; import { DataLakeAclChangeFailedError } from "./utils/DataLakeAclChangeFailedError"; import { tracingClient } from "./utils/tracing"; import { appendToURLPath, appendToURLQuery, assertResponse, ensureCpkIfSpecified, getURLPathAndQuery, ParsePathGetPropertiesExtraHeaderValues, setURLPath, setURLQueries, } from "./utils/utils.common"; import { fsCreateReadStream, fsStat } from "./utils/utils.node"; /** * A DataLakePathClient represents a URL to the Azure Storage path (directory or file). */ export class DataLakePathClient extends StorageClient { /** * SetAccessControlRecursiveInternal operation sets the Access Control on a path and sub paths. * * @param mode - Mode \"set\" sets POSIX access control rights on files and directories, * Mode \"modify\" modifies one or more POSIX access control rights that pre-exist on files and directories, * Mode \"remove\" removes one or more POSIX access control rights that were present earlier on files and directories. * @param acl - The POSIX access control list for the file or directory. * @param options - Optional. Options */ async setAccessControlRecursiveInternal(mode, acl, options = {}) { if (options.maxBatches !== undefined && options.maxBatches < 1) { throw RangeError(`Options maxBatches must be larger than 0.`); } if (options.batchSize !== undefined && options.batchSize < 1) { throw RangeError(`Options batchSize must be larger than 0.`); } const result = { counters: { failedChangesCount: 0, changedDirectoriesCount: 0, changedFilesCount: 0, }, continuationToken: undefined, }; return tracingClient.withSpan("DataLakePathClient-setAccessControlRecursiveInternal", options, async (updatedOptions) => { var _a, _b, _c, _d, _e, _f; let continuationToken = options.continuationToken; let batchCounter = 0; let reachMaxBatches = false; do { let response; try { response = await this.pathContext.setAccessControlRecursive(mode, Object.assign(Object.assign({}, updatedOptions), { acl: toAclString(acl), maxRecords: options.batchSize, continuation: continuationToken, forceFlag: options.continueOnFailure })); } catch (e) { throw new DataLakeAclChangeFailedError(e, continuationToken); } batchCounter++; continuationToken = response.continuation; // Update result result.continuationToken = continuationToken; result.counters.failedChangesCount += (_a = response.failureCount) !== null && _a !== void 0 ? _a : 0; result.counters.changedDirectoriesCount += (_b = response.directoriesSuccessful) !== null && _b !== void 0 ? _b : 0; result.counters.changedFilesCount += (_c = response.filesSuccessful) !== null && _c !== void 0 ? _c : 0; // Progress event call back if (options.onProgress) { const progress = { batchFailures: toAccessControlChangeFailureArray(response.failedEntries), batchCounters: { failedChangesCount: (_d = response.failureCount) !== null && _d !== void 0 ? _d : 0, changedDirectoriesCount: (_e = response.directoriesSuccessful) !== null && _e !== void 0 ? _e : 0, changedFilesCount: (_f = response.filesSuccessful) !== null && _f !== void 0 ? _f : 0, }, aggregateCounters: result.counters, continuationToken: continuationToken, }; options.onProgress(progress); } reachMaxBatches = options.maxBatches === undefined ? false : batchCounter >= options.maxBatches; } while (continuationToken && !reachMaxBatches); return result; }); } constructor(url, credentialOrPipeline, // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */ options) { if (isPipelineLike(credentialOrPipeline)) { super(url, credentialOrPipeline); } else { let credential; if (credentialOrPipeline === undefined) { credential = new AnonymousCredential(); } else { credential = credentialOrPipeline; } const pipeline = newPipeline(credential, options); super(url, pipeline); } this.pathContext = new Path(this.storageClientContext); this.blobClient = new BlobClient(this.blobEndpointUrl, this.pipeline); } /** * Name of current file system. * * @readonly */ get fileSystemName() { return this.blobClient.containerName; } /** * Name of current path (directory or file). * * @readonly */ get name() { return this.blobClient.name; } /** * Convert current DataLakePathClient to DataLakeDirectoryClient if current path is a directory. * */ // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */ toDirectoryClient() { return new DataLakeDirectoryClient(this.dfsEndpointUrl, this.pipeline); } /** * Convert current DataLakePathClient to DataLakeFileClient if current path is a file. * */ // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */ toFileClient() { return new DataLakeFileClient(this.dfsEndpointUrl, this.pipeline); } /** * Get a {@link DataLakeLeaseClient} that manages leases on the path (directory or file). * * @param proposeLeaseId - Optional. Initial proposed lease Id. */ getDataLakeLeaseClient(proposeLeaseId) { return new DataLakeLeaseClient(this.blobClient.getBlobLeaseClient(proposeLeaseId)); } /** * Create a directory or path. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/create * * @param resourceType - Resource type, "directory" or "file". * @param options - Optional. Options when creating path. */ async create(resourceType, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-create", options, async (updatedOptions) => { ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps); let expiryOptions; let expiresOn; if (typeof options.expiresOn === "number" && Number.isFinite(options.expiresOn)) { expiryOptions = "RelativeToNow"; expiresOn = String(Math.round(options.expiresOn)); } else if (options.expiresOn instanceof Date) { expiryOptions = "Absolute"; expiresOn = options.expiresOn.toUTCString(); } else if (options.expiresOn) { throw new Error(`Value for expiresOn is invalid: ${options.expiresOn}`); } return assertResponse(await this.pathContext.create(Object.assign(Object.assign({}, updatedOptions), { resource: resourceType, leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions, properties: toProperties(options.metadata), cpkInfo: options.customerProvidedKey, acl: options.acl ? toAclString(options.acl) : undefined, expiryOptions, expiresOn }))); }); } /** * Create a directory or file. If the resource already exists, it is not changed. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/create * * @param resourceType - Resource type, "directory" or "file". * @param options - */ async createIfNotExists(resourceType, options = {}) { return tracingClient.withSpan("DataLakePathClient-createIfNotExists", options, async (updatedOptions) => { var _a, _b; try { const conditions = { ifNoneMatch: ETagAny }; const res = await this.create(resourceType, Object.assign(Object.assign({}, options), { conditions, tracingOptions: updatedOptions.tracingOptions })); return Object.assign({ succeeded: true }, res); } catch (e) { if (((_a = e.details) === null || _a === void 0 ? void 0 : _a.errorCode) === "PathAlreadyExists") { return Object.assign(Object.assign({ succeeded: false }, (_b = e.response) === null || _b === void 0 ? void 0 : _b.parsedHeaders), { _response: e.response }); } throw e; } }); } /** * Returns true if the Data Lake file represented by this client exists; false otherwise. * * NOTE: use this function with care since an existing file might be deleted by other clients or * applications. Vice versa new files might be added by other clients or applications after this * function completes. * * @param options - options to Exists operation. */ async exists(options = {}) { return tracingClient.withSpan("DataLakeFileClient-exists", options, async (updatedOptions) => { return this.blobClient.exists(Object.assign(Object.assign({}, updatedOptions), { customerProvidedKey: toBlobCpkInfo(updatedOptions.customerProvidedKey) })); }); } /** * Delete current path (directory or file). * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/delete * * @param recursive - Required and valid only when the resource is a directory. If "true", all paths beneath the directory will be deleted. * @param options - Optional. Options when deleting path. */ async delete(recursive, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-delete", options, async (updatedOptions) => { if (this.isTokenCredential === undefined) { this.isTokenCredential = false; this.pipeline.factories.forEach((factory) => { if (isTokenCredential(factory.credential)) { this.isTokenCredential = true; } }); if (isTokenCredential(this.pipeline._credential)) { this.isTokenCredential = true; } } const paginated = recursive === true && this.isTokenCredential === true; let continuation; let response; // How to handle long delete loop? do { response = assertResponse(await this.pathContext.delete(Object.assign(Object.assign({}, updatedOptions), { continuation, recursive, leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions, abortSignal: options.abortSignal, paginated }))); continuation = response.continuation; } while (continuation); return response; }); } /** * Delete current path (directory or file) if it exists. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/delete * * @param recursive - Required and valid only when the resource is a directory. If "true", all paths beneath the directory will be deleted. * @param options - */ async deleteIfExists(recursive, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-deleteIfExists", options, async (updatedOptions) => { var _a, _b; try { const res = await this.delete(recursive, updatedOptions); return Object.assign({ succeeded: true }, res); } catch (e) { if (((_a = e.details) === null || _a === void 0 ? void 0 : _a.errorCode) === "PathNotFound") { return Object.assign(Object.assign({ succeeded: false }, (_b = e.response) === null || _b === void 0 ? void 0 : _b.parsedHeaders), { _response: e.response }); } throw e; } }); } /** * Returns the access control data for a path (directory of file). * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/getproperties * * @param options - Optional. Options when getting file access control. */ async getAccessControl(options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-getAccessControl", options, async (updatedOptions) => { const response = assertResponse(await this.pathContext.getProperties(Object.assign(Object.assign({}, updatedOptions), { action: "getAccessControl", upn: options.userPrincipalName, leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions, abortSignal: options.abortSignal }))); return Object.assign(Object.assign({}, response), { _response: response._response, permissions: toPermissions(response.permissions), acl: toAcl(response.acl) }); }); } /** * Set the access control data for a path (directory of file). * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param acl - The POSIX access control list for the file or directory. * @param options - Optional. Options when setting path access control. */ async setAccessControl(acl, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-setAccessControl", options, async (updatedOptions) => { return assertResponse(await this.pathContext.setAccessControl(Object.assign(Object.assign({}, updatedOptions), { acl: toAclString(acl), leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions }))); }); } /** * Sets the Access Control on a path and sub paths. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param acl - The POSIX access control list for the file or directory. * @param options - Optional. Options */ async setAccessControlRecursive(acl, options = {}) { return tracingClient.withSpan("DataLakePathClient-setAccessControlRecursive", options, async (updatedOptions) => { return this.setAccessControlRecursiveInternal("set", acl, updatedOptions); }); } /** * Modifies the Access Control on a path and sub paths. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param acl - The POSIX access control list for the file or directory. * @param options - Optional. Options */ async updateAccessControlRecursive(acl, options = {}) { return tracingClient.withSpan("DataLakePathClient-updateAccessControlRecursive", options, async (updatedOptions) => { return this.setAccessControlRecursiveInternal("modify", acl, updatedOptions); }); } /** * Removes the Access Control on a path and sub paths. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param acl - The POSIX access control list for the file or directory. * @param options - Optional. Options */ async removeAccessControlRecursive(acl, options = {}) { return tracingClient.withSpan("DataLakePathClient-removeAccessControlRecursive", options, async (updatedOptions) => { return this.setAccessControlRecursiveInternal("remove", acl, updatedOptions); }); } /** * Sets the file permissions on a path. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param permissions - The POSIX access permissions for the file owner, the file owning group, and others. * @param options - Optional. Options when setting path permissions. */ async setPermissions(permissions, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakePathClient-setPermissions", options, async (updatedOptions) => { return assertResponse(await this.pathContext.setAccessControl(Object.assign(Object.assign({}, updatedOptions), { permissions: toPermissionsString(permissions), leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions }))); }); } /** * Returns all user-defined metadata, standard HTTP properties, and system properties * for the path (directory or file). * * WARNING: The `metadata` object returned in the response will have its keys in lowercase, even if * they originally contained uppercase characters. This differs from the metadata keys returned by * the methods of {@link DataLakeFileSystemClient} that list paths using the `includeMetadata` option, which * will retain their original casing. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-properties * * @param options - Optional. Options when getting path properties. */ async getProperties(options = {}) { return tracingClient.withSpan("DataLakePathClient-getProperties", options, async (updatedOptions) => { const response = await this.blobClient.getProperties(Object.assign(Object.assign({}, options), { customerProvidedKey: toBlobCpkInfo(options.customerProvidedKey), tracingOptions: updatedOptions.tracingOptions })); return ParsePathGetPropertiesExtraHeaderValues(response); }); } /** * Sets system properties on the path (directory or file). * * If no value provided, or no value provided for the specified blob HTTP headers, * these blob HTTP headers without a value will be cleared. * @see https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-properties * * @param httpHeaders - * @param options - */ async setHttpHeaders(httpHeaders, options = {}) { return tracingClient.withSpan("DataLakePathClient-setHttpHeaders", options, async (updatedOptions) => { return this.blobClient.setHTTPHeaders({ blobCacheControl: httpHeaders.cacheControl, blobContentType: httpHeaders.contentType, blobContentMD5: httpHeaders.contentMD5, blobContentEncoding: httpHeaders.contentEncoding, blobContentLanguage: httpHeaders.contentLanguage, blobContentDisposition: httpHeaders.contentDisposition, }, updatedOptions); }); } /** * Sets user-defined metadata for the specified path (directory of file) as one or more name-value pairs. * * If no option provided, or no metadata defined in the parameter, the path * metadata will be removed. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-metadata * * @param metadata - Optional. Replace existing metadata with this value. * If no value provided the existing metadata will be removed. * @param options - Optional. Options when setting path metadata. */ async setMetadata(metadata, options = {}) { return tracingClient.withSpan("DataLakePathClient-setMetadata", options, async (updatedOptions) => { return this.blobClient.setMetadata(metadata, Object.assign(Object.assign({}, options), { customerProvidedKey: toBlobCpkInfo(options.customerProvidedKey), tracingOptions: updatedOptions.tracingOptions })); }); } async move(destinationPathOrFileSystem, destinationPathOrOptions, options) { let destinationFileSystem = this.fileSystemName; let destinationPath = destinationPathOrFileSystem; let pathMoveOptions; if (typeof destinationPathOrOptions === "string") { destinationFileSystem = destinationPathOrFileSystem; destinationPath = destinationPathOrOptions; pathMoveOptions = options !== null && options !== void 0 ? options : {}; } else { pathMoveOptions = destinationPathOrOptions !== null && destinationPathOrOptions !== void 0 ? destinationPathOrOptions : {}; } const renameSource = getURLPathAndQuery(this.dfsEndpointUrl); const split = destinationPath.split("?"); let destinationUrl; if (split.length === 2) { const renameDestination = `/${destinationFileSystem}/${split[0]}`; destinationUrl = setURLPath(this.dfsEndpointUrl, renameDestination); destinationUrl = setURLQueries(destinationUrl, split[1]); } else if (split.length === 1) { const renameDestination = `/${destinationFileSystem}/${destinationPath}`; destinationUrl = setURLPath(this.dfsEndpointUrl, renameDestination); } else { throw new RangeError("Destination path should not contain more than one query string"); } const destPathClient = new DataLakePathClient(destinationUrl, this.pipeline); return tracingClient.withSpan("DataLakePathClient-move", pathMoveOptions, async (updatedOptions) => { var _a, _b, _c, _d, _e; return assertResponse(await destPathClient.pathContext.create(Object.assign(Object.assign({}, updatedOptions), { mode: "legacy", // By default renameSource, sourceLeaseId: (_a = pathMoveOptions.conditions) === null || _a === void 0 ? void 0 : _a.leaseId, leaseAccessConditions: pathMoveOptions.destinationConditions, sourceModifiedAccessConditions: { sourceIfMatch: (_b = pathMoveOptions.conditions) === null || _b === void 0 ? void 0 : _b.ifMatch, sourceIfNoneMatch: (_c = pathMoveOptions.conditions) === null || _c === void 0 ? void 0 : _c.ifNoneMatch, sourceIfModifiedSince: (_d = pathMoveOptions.conditions) === null || _d === void 0 ? void 0 : _d.ifModifiedSince, sourceIfUnmodifiedSince: (_e = pathMoveOptions.conditions) === null || _e === void 0 ? void 0 : _e.ifUnmodifiedSince, }, modifiedAccessConditions: pathMoveOptions.destinationConditions, abortSignal: pathMoveOptions.abortSignal }))); }); } } /** * A DataLakeDirectoryClient represents a URL to the Azure Storage directory. */ export class DataLakeDirectoryClient extends DataLakePathClient { async create(resourceTypeOrOptions, options = {}) { if (resourceTypeOrOptions === "file") { throw TypeError(`DataLakeDirectoryClient:create() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeFileClient for file creation.`); } let pathCreateOptions; if (resourceTypeOrOptions === "directory") { pathCreateOptions = options; } else { pathCreateOptions = resourceTypeOrOptions !== null && resourceTypeOrOptions !== void 0 ? resourceTypeOrOptions : {}; } return tracingClient.withSpan("DataLakeDirectoryClient-create", pathCreateOptions, async (updatedOptions) => { return super.create("directory", updatedOptions); }); } async createIfNotExists(resourceTypeOrOptions, options = {}) { if (resourceTypeOrOptions === "file") { throw TypeError(`DataLakeDirectoryClient:createIfNotExists() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeFileClient for file creation.`); } if (resourceTypeOrOptions !== "directory") { options = resourceTypeOrOptions || {}; } return tracingClient.withSpan("DataLakeDirectoryClient-createIfNotExists", options, async (updatedOptions) => { return super.createIfNotExists("directory", Object.assign({}, updatedOptions)); }); } /** * Creates a {@link DataLakeDirectoryClient} object under current directory. * * @param subdirectoryName - Subdirectory name. */ getSubdirectoryClient(subdirectoryName) { return new DataLakeDirectoryClient(appendToURLPath(this.url, encodeURIComponent(subdirectoryName)), this.pipeline); } /** * Creates a {@link DataLakeFileClient} object under current directory. * * @param fileName - */ // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */ getFileClient(fileName) { return new DataLakeFileClient(appendToURLPath(this.url, encodeURIComponent(fileName)), this.pipeline); } /** * Only available for clients constructed with a shared key credential. * * Generates a Service Shared Access Signature (SAS) URI based on the client properties * and parameters passed in. The SAS is signed by the shared key credential of the client. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/constructing-a-service-sas * * @param options - Optional parameters. * @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token. */ generateSasUrl(options) { return new Promise((resolve) => { if (!(this.credential instanceof StorageSharedKeyCredential)) { throw RangeError("Can only generate the SAS when the client is initialized with a shared key credential"); } const sas = generateDataLakeSASQueryParameters(Object.assign({ fileSystemName: this.fileSystemName, pathName: this.name, isDirectory: true }, options), this.credential).toString(); resolve(appendToURLQuery(this.url, sas)); }); } /** * Generates string to sign for a Service Shared Access Signature (SAS) URI based on the client properties * and parameters passed in. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/constructing-a-service-sas * * @param options - Optional parameters. * @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token. */ /* eslint-disable-next-line @azure/azure-sdk/ts-naming-options*/ generateSasStringToSign(options) { if (!(this.credential instanceof StorageSharedKeyCredential)) { throw RangeError("Can only generate the SAS when the client is initialized with a shared key credential"); } return generateDataLakeSASQueryParametersInternal(Object.assign({ fileSystemName: this.fileSystemName, pathName: this.name, isDirectory: true }, options), this.credential).stringToSign; } /** * Generates a Service Shared Access Signature (SAS) URI based on the client properties * and parameters passed in. The SAS is signed by the input user delegation key. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/constructing-a-service-sas * * @param options - Optional parameters. * @param userDelegationKey - Return value of `blobServiceClient.getUserDelegationKey()` * @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token. */ generateUserDelegationSasUrl(options, userDelegationKey) { return new Promise((resolve) => { const sas = generateDataLakeSASQueryParameters(Object.assign({ fileSystemName: this.fileSystemName, pathName: this.name, isDirectory: true }, options), userDelegationKey, this.accountName).toString(); resolve(appendToURLQuery(this.url, sas)); }); } /** * Generates string to sign for a Service Shared Access Signature (SAS) URI based on the client properties * and parameters passed in The SAS is signed by the input user delegation key. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/constructing-a-service-sas * * @param options - Optional parameters. * @param userDelegationKey - Return value of `blobServiceClient.getUserDelegationKey()` * @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token. */ generateUserDelegationSasStringToSign(options, userDelegationKey) { return generateDataLakeSASQueryParametersInternal(Object.assign({ fileSystemName: this.fileSystemName, pathName: this.name, isDirectory: true }, options), userDelegationKey, this.accountName).stringToSign; } } /** * A DataLakeFileClient represents a URL to the Azure Storage file. */ export class DataLakeFileClient extends DataLakePathClient { constructor(url, credentialOrPipeline, // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */ options) { if (isPipelineLike(credentialOrPipeline)) { super(url, credentialOrPipeline); } else { let credential; if (credentialOrPipeline === undefined) { credential = new AnonymousCredential(); } else { credential = credentialOrPipeline; } const pipeline = newPipeline(credential, options); super(url, pipeline); } this.pathContextInternal = new Path(this.storageClientContext); this.blockBlobClientInternal = new BlockBlobClient(this.blobEndpointUrl, this.pipeline); this.pathContextInternalToBlobEndpoint = new Path(this.storageClientContextToBlobEndpoint); } async create(resourceTypeOrOptions, options = {}) { if (resourceTypeOrOptions === "directory") { throw TypeError(`DataLakeFileClient:create() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeDirectoryClient for directory creation.`); } let pathCreateOptions; if (resourceTypeOrOptions === "file") { pathCreateOptions = options; } else { pathCreateOptions = resourceTypeOrOptions !== null && resourceTypeOrOptions !== void 0 ? resourceTypeOrOptions : {}; } return tracingClient.withSpan("DataLakeFileClient-create", pathCreateOptions, async (updatedOptions) => { return super.create("file", updatedOptions); }); } async createIfNotExists(resourceTypeOrOptions, options = {}) { if (resourceTypeOrOptions === "directory") { throw TypeError(`DataLakeFileClient:createIfNotExists() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeDirectoryClient for directory creation.`); } if (resourceTypeOrOptions !== "file") { options = resourceTypeOrOptions || {}; } return tracingClient.withSpan("DataLakeFileClient-createIfNotExists", options, async (updatedOptions) => { return super.createIfNotExists("file", updatedOptions); }); } /** * Downloads a file from the service, including its metadata and properties. * * * In Node.js, data returns in a Readable stream readableStreamBody * * In browsers, data returns in a promise contentAsBlob * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob * * * Example usage (Node.js): * * ```js * // Download and convert a file to a string * const downloadResponse = await fileClient.read(); * const downloaded = await streamToBuffer(downloadResponse.readableStreamBody); * console.log("Downloaded file content:", downloaded.toString()); * * async function streamToBuffer(readableStream) { * return new Promise((resolve, reject) => { * const chunks = []; * readableStream.on("data", (data) => { * chunks.push(typeof data === "string" ? Buffer.from(data) : data); * }); * readableStream.on("end", () => { * resolve(Buffer.concat(chunks)); * }); * readableStream.on("error", reject); * }); * } * ``` * * Example usage (browser): * * ```js * // Download and convert a file to a string * const downloadResponse = await fileClient.read(); * const downloaded = await blobToString(await downloadResponse.contentAsBlob); * console.log("Downloaded file content", downloaded); * * async function blobToString(blob: Blob): Promise<string> { * const fileReader = new FileReader(); * return new Promise<string>((resolve, reject) => { * fileReader.onloadend = (ev: any) => { * resolve(ev.target!.result); * }; * fileReader.onerror = reject; * fileReader.readAsText(blob); * }); * } * ``` * * @param offset - Optional. Offset to read file, default value is 0. * @param count - Optional. How many bytes to read, default will read from offset to the end. * @param options - Optional. Options when reading file. */ async read(offset = 0, count, options = {}) { return tracingClient.withSpan("DataLakeFileClient-read", options, async (updatedOptions) => { const rawResponse = await this.blockBlobClientInternal.download(offset, count, Object.assign(Object.assign({}, updatedOptions), { customerProvidedKey: toBlobCpkInfo(updatedOptions.customerProvidedKey) })); const response = ParsePathGetPropertiesExtraHeaderValues(rawResponse); if (!isNode && !response.contentAsBlob) { response.contentAsBlob = rawResponse.blobBody; } response.fileContentMD5 = rawResponse.blobContentMD5; response._response.parsedHeaders.fileContentMD5 = rawResponse._response.parsedHeaders.blobContentMD5; delete rawResponse.blobContentMD5; delete rawResponse._response.parsedHeaders.blobContentMD5; return response; }); } /** * Uploads data to be appended to a file. Data can only be appended to a file. * To apply perviously uploaded data to a file, call flush. * * @see https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update * * @param body - Content to be uploaded. * @param offset - Append offset in bytes. * @param length - Length of content to append in bytes. * @param options - Optional. Options when appending data. */ async append(body, offset, length, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakeFileClient-append", options, async (updatedOptions) => { ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps); return assertResponse(await this.pathContextInternal.appendData(body, Object.assign(Object.assign({}, updatedOptions), { pathHttpHeaders: { contentMD5: options.transactionalContentMD5, }, abortSignal: options.abortSignal, position: offset, contentLength: length, leaseAccessConditions: options.conditions, requestOptions: { onUploadProgress: options.onProgress, }, cpkInfo: options.customerProvidedKey, flush: options.flush, proposedLeaseId: options.proposedLeaseId, leaseDuration: options.leaseDurationInSeconds, leaseAction: options.leaseAction }))); }); } /** * Flushes (writes) previously appended data to a file. * * @param position - File position to flush. * This parameter allows the caller to upload data in parallel and control the order in which it is appended to the file. * It is required when uploading data to be appended to the file and when flushing previously uploaded data to the file. * The value must be the position where the data is to be appended. Uploaded data is not immediately flushed, or written, * to the file. To flush, the previously uploaded data must be contiguous, the position parameter must be specified and * equal to the length of the file after all data has been written, and there must not be a request entity body included * with the request. * @param options - Optional. Options when flushing data. */ async flush(position, options = {}) { options.conditions = options.conditions || {}; return tracingClient.withSpan("DataLakeFileClient-flush", options, async (updatedOptions) => { ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps); return assertResponse(await this.pathContextInternal.flushData(Object.assign(Object.assign({}, updatedOptions), { position, contentLength: 0, leaseAccessConditions: options.conditions, modifiedAccessConditions: options.conditions, cpkInfo: options.customerProvidedKey, proposedLeaseId: options.proposedLeaseId, leaseDuration: options.leaseDurationInSeconds, leaseAction: options.leaseAction }))); }); } // high level functions /** * ONLY AVAILABLE IN NODE.JS RUNTIME. * * Uploads a local file to a Data Lake file. * * @param filePath - Full path of the local file * @param options - */ async uploadFile(filePath, // Legacy, no way to fix the eslint error without breaking. Disable the rule for this line. /* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */ options = {}) { return tracingClient.withSpan("DataLakeFileClient-uploadFile", options, async (updatedOptions) => { const size = (await fsStat(filePath)).size; return this.uploadSeekableInternal((offset, contentSize) => { return () => fsCreateReadStream(filePath, { autoClose: true, end: offset + contentSize - 1, start: offset, }); }, size, updatedOptions); }); } /** * Uploads a Buffer(Node.js)/Blob/ArrayBuffer/ArrayBufferView to a File. * * @param data - Buffer(Node), Blob, ArrayBuffer or ArrayBufferView * @param options - */ async upload(data, options = {}) { return tracingClient.withSpan("DataLakeFileClient-upload", options, async (updatedOptions) => { if (isNode) { let buffer; if (data instanceof Buffer) { buffer = data; } else if (data instanceof ArrayBuffer) { buffer = Buffer.from(data); } else { data = data; buffer = Buffer.from(data.buffer, data.byteOffset, data.byteLength); } return this.uploadSeekableInternal((offset, size) => buffer.slice(offset, offset + size), buffer.length, updatedOptions); } else { const browserBlob = new Blob([data]); return this.uploadSeekableInternal((offset, size) => browserBlob.slice(offset, offset + size), browserBlob.size, updatedOptions); } }); } async uploadSeekableInternal(bodyFactory, size, options = {}) { return tracingClient.withSpan("DataLakeFileClient-uploadData", options, async (updatedOptions) => { var _a; if (size > FILE_MAX_SIZE_BYTES) { throw new RangeError(`size must be <= ${FILE_MAX_SIZE_BYTES}.`); } // Create the file. const createRes = this.create({ abortSignal: options.abortSignal, metadata: options.metadata, permissions: options.permissions, umask: options.umask, conditions: options.conditions, pathHttpHeaders: options.pathHttpHeaders, customerProvidedKey: updatedOptions.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, encryptionContext: updatedOptions.encryptionContext, }); // append() with empty data would return error, so do not continue if (size === 0) { return createRes; } else { await createRes; } // After the File is Create, Lease ID is the only valid request parameter. options.conditions = { leaseId: (_a = options.conditions) === null || _a === void 0 ? void 0 : _a.leaseId }; if (!options.chunkSize) { options.chunkSize = Math.ceil(size / BLOCK_BLOB_MAX_BLOCKS); if (options.chunkSize < FILE_UPLOAD_DEFAULT_CHUNK_SIZE) { options.chunkSize = FILE_UPLOAD_DEFAULT_CHUNK_SIZE; } } if (options.chunkSize < 1 || options.chunkSize > FILE_UPLOAD_MAX_CHUNK_SIZE) { throw new RangeError(`chunkSize option must be >= 1 and <= ${FILE_UPLOAD_MAX_CHUNK_SIZE}`); } if (!options.maxConcurrency) { options.maxConcurrency = DEFAULT_HIGH_LEVEL_CONCURRENCY; } if (options.maxConcurrency <= 0) { throw new RangeError(`maxConcurrency must be > 0.`); } if (!options.singleUploadThreshold) { options.singleUploadThreshold = FILE_MAX_SINGLE_UPLOAD_THRESHOLD; } if (options.singleUploadThreshold < 1 || options.singleUploadThreshold > FILE_MAX_SINGLE_UPLOAD_THRESHOLD) { throw new RangeError(`singleUploadThreshold option must be >= 1 and <= ${FILE_MAX_SINGLE_UPLOAD_THRESHOLD}`); } // When buffer length <= singleUploadThreshold, this method will use one append/flush call to finish the upload. if (size <= options.singleUploadThreshold) { await this.append(bodyFactory(0, size), 0, size, { abortSignal: options.abortSignal, conditions: options.conditions, customerProvidedKey: updatedOptions.customerProvidedKey, onProgress: options.onProgress, tracingOptions: updatedOptions.tracingOptions, }); return this.flush(size, { abortSignal: options.abortSignal, conditions: options.conditions, close: options.close, pathHttpHeaders: options.pathHttpHeaders, customerProvidedKey: updatedOptions.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, }); } const numBlocks = Math.floor((size - 1) / options.chunkSize) + 1; if (numBlocks > BLOCK_BLOB_MAX_BLOCKS) { throw new RangeError(`The data's size is too big or the chunkSize is too small;` + `the number of chunks must be <= ${BLOCK_BLOB_MAX_BLOCKS}`); } let transferProgress = 0; const batch = new Batch(options.maxConcurrency); for (let i = 0; i < numBlocks; i++) { batch.addOperation(async () => { const start = options.chunkSize * i; const end = i === numBlocks - 1 ? size : start + options.chunkSize; const contentLength = end - start; await this.append(bodyFactory(start, contentLength), start, contentLength, { abortSignal: options.abortSignal, conditions: options.conditions, customerProvidedKey: updatedOptions.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, }); transferProgress += contentLength; if (options.onProgress) { options.onProgress({ loadedBytes: transferProgress }); } }); } await batch.do(); return this.flush(size, { abortSignal: options.abortSignal, conditions: options.conditions, close: options.close, pathHttpHeaders: options.pathHttpHeaders, customerProvidedKey: updatedOptions.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, }); }); } /** * ONLY AVAILABLE IN NODE.JS RUNTIME. * * Uploads a Node.js Readable stream into a Data Lake file. * This method will try to create a file, then starts uploading chunk by chunk. * Please make sure potential size of stream doesn't exceed FILE_MAX_SIZE_BYTES and * potential number of chunks doesn't exceed BLOCK_BLOB_MAX_BLOCKS. * * PERFORMANCE IMPROVEMENT TIPS: * * Input stream highWaterMark is better to set a same value with options.chunkSize * parameter, which will avoid Buffer.concat() operations. * * @param stream - Node.js Readable stream. * @param options - */ async uploadStream(stream, options = {}) { return tracingClient.withSpan("DataLakeFileClient-uploadStream", options, async (updatedOptions) => { var _a; // Create the file await this.create({ abortSignal: options.abortSignal, metadata: options.metadata, permissions: options.permissions, umask: options.umask, conditions: options.conditions, pathHttpHeaders: options.pathHttpHeaders, customerProvidedKey: options.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, encryptionContext: updatedOptions.encryptionContext, }); // After the File is Create, Lease ID is the only valid request parameter. options.conditions = { leaseId: (_a = options.conditions) === null || _a === void 0 ? void 0 : _a.leaseId }; if (!options.chunkSize) { options.chunkSize = FILE_UPLOAD_DEFAULT_CHUNK_SIZE; } if (options.chunkSize < 1 || options.chunkSize > FILE_UPLOAD_MAX_CHUNK_SIZE) { throw new RangeError(`chunkSize option must be >= 1 and <= ${FILE_UPLOAD_MAX_CHUNK_SIZE}`); } if (!options.maxConcurrency) { options.maxConcurrency = DEFAULT_HIGH_LEVEL_CONCURRENCY; } if (options.maxConcurrency <= 0) { throw new RangeError(`maxConcurrency must be > 0.`); } let transferProgress = 0; const scheduler = new BufferScheduler(stream, options.chunkSize, options.maxConcurrency, async (body, length, offset) => { await this.append(body, offset, length, { abortSignal: options.abortSignal, conditions: options.conditions, customerProvidedKey: options.customerProvidedKey, tracingOptions: updatedOptions.tracingOptions, }); // Update progress after block is successfully uploaded to server, in case of block trying transferProgress += length; if (options.onProgress) { options.onProgress({ loadedBytes: transferProgress }); } }, // concurrency should set a smaller value than maxConcurrency, which is helpful to // reduce the possibility