@azure/storage-file-datalake
Version:
Microsoft Azure Storage SDK for JavaScript - DataLake
1,071 lines • 64.4 kB
JavaScript
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
import { isTokenCredential } from "@azure/core-auth";
import { isNodeLike } from "@azure/core-util";
import { isPipelineLike, newPipeline } from "./Pipeline.js";
import { BlobClient, BlockBlobClient } from "@azure/storage-blob";
import { AnonymousCredential } from "@azure/storage-blob";
import { StorageSharedKeyCredential } from "./credentials/StorageSharedKeyCredential.js";
import { BufferScheduler } from "@azure/storage-common";
import { DataLakeLeaseClient } from "./DataLakeLeaseClient.js";
import { PathOperationsImpl as Path } from "./generated/src/operations/index.js";
import { generateDataLakeSASQueryParameters, generateDataLakeSASQueryParametersInternal, } from "./sas/DataLakeSASSignatureValues.js";
import { StorageClient } from "./StorageClient.js";
import { toAccessControlChangeFailureArray, toAcl, toAclString, toBlobCpkInfo, toPermissions, toPermissionsString, toProperties, } from "./transforms.js";
import { Batch } from "./utils/Batch.js";
import { BLOCK_BLOB_MAX_BLOCKS, DEFAULT_HIGH_LEVEL_CONCURRENCY, ETagAny, FILE_MAX_SINGLE_UPLOAD_THRESHOLD, FILE_MAX_SIZE_BYTES, FILE_UPLOAD_DEFAULT_CHUNK_SIZE, FILE_UPLOAD_MAX_CHUNK_SIZE, } from "./utils/constants.js";
import { DataLakeAclChangeFailedError } from "./utils/DataLakeAclChangeFailedError.js";
import { tracingClient } from "./utils/tracing.js";
import { appendToURLPath, appendToURLQuery, assertResponse, ensureCpkIfSpecified, getURLPathAndQuery, ParsePathGetPropertiesExtraHeaderValues, setURLPath, setURLQueries, } from "./utils/utils.common.js";
import { fsCreateReadStream, fsStat } from "./utils/utils.js";
/**
* A DataLakePathClient represents a URL to the Azure Storage path (directory or file).
*/
export class DataLakePathClient extends StorageClient {
/**
* pathContext provided by protocol layer.
*/
pathContext;
/**
* blobClient provided by `@azure/storage-blob` package.
*/
blobClient;
isTokenCredential;
/**
* SetAccessControlRecursiveInternal operation sets the Access Control on a path and sub paths.
*
* @param mode - Mode \"set\" sets POSIX access control rights on files and directories,
* Mode \"modify\" modifies one or more POSIX access control rights that pre-exist on files and directories,
* Mode \"remove\" removes one or more POSIX access control rights that were present earlier on files and directories.
* @param acl - The POSIX access control list for the file or directory.
* @param options - Optional. Options
*/
async setAccessControlRecursiveInternal(mode, acl, options = {}) {
if (options.maxBatches !== undefined && options.maxBatches < 1) {
throw RangeError(`Options maxBatches must be larger than 0.`);
}
if (options.batchSize !== undefined && options.batchSize < 1) {
throw RangeError(`Options batchSize must be larger than 0.`);
}
const result = {
counters: {
failedChangesCount: 0,
changedDirectoriesCount: 0,
changedFilesCount: 0,
},
continuationToken: undefined,
};
return tracingClient.withSpan("DataLakePathClient-setAccessControlRecursiveInternal", options, async (updatedOptions) => {
let continuationToken = options.continuationToken;
let batchCounter = 0;
let reachMaxBatches = false;
do {
let response;
try {
response = await this.pathContext.setAccessControlRecursive(mode, {
...updatedOptions,
acl: toAclString(acl),
maxRecords: options.batchSize,
continuation: continuationToken,
forceFlag: options.continueOnFailure,
});
}
catch (e) {
throw new DataLakeAclChangeFailedError(e, continuationToken);
}
batchCounter++;
continuationToken = response.continuation;
// Update result
result.continuationToken = continuationToken;
result.counters.failedChangesCount += response.failureCount ?? 0;
result.counters.changedDirectoriesCount += response.directoriesSuccessful ?? 0;
result.counters.changedFilesCount += response.filesSuccessful ?? 0;
// Progress event call back
if (options.onProgress) {
const progress = {
batchFailures: toAccessControlChangeFailureArray(response.failedEntries),
batchCounters: {
failedChangesCount: response.failureCount ?? 0,
changedDirectoriesCount: response.directoriesSuccessful ?? 0,
changedFilesCount: response.filesSuccessful ?? 0,
},
aggregateCounters: result.counters,
continuationToken: continuationToken,
};
options.onProgress(progress);
}
reachMaxBatches =
options.maxBatches === undefined ? false : batchCounter >= options.maxBatches;
} while (continuationToken && !reachMaxBatches);
return result;
});
}
constructor(url, credentialOrPipeline,
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */
options) {
if (isPipelineLike(credentialOrPipeline)) {
super(url, credentialOrPipeline);
}
else {
let credential;
if (credentialOrPipeline === undefined) {
credential = new AnonymousCredential();
}
else {
credential = credentialOrPipeline;
}
const pipeline = newPipeline(credential, options);
super(url, pipeline);
}
this.pathContext = new Path(this.storageClientContext);
this.blobClient = new BlobClient(this.blobEndpointUrl, this.pipeline);
}
/**
* Name of current file system.
*
* @readonly
*/
get fileSystemName() {
return this.blobClient.containerName;
}
/**
* Name of current path (directory or file).
*
* @readonly
*/
get name() {
return this.blobClient.name;
}
/**
* Convert current DataLakePathClient to DataLakeDirectoryClient if current path is a directory.
*
*/
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */
toDirectoryClient() {
return new DataLakeDirectoryClient(this.dfsEndpointUrl, this.pipeline);
}
/**
* Convert current DataLakePathClient to DataLakeFileClient if current path is a file.
*
*/
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */
toFileClient() {
return new DataLakeFileClient(this.dfsEndpointUrl, this.pipeline);
}
/**
* Get a {@link DataLakeLeaseClient} that manages leases on the path (directory or file).
*
* @param proposeLeaseId - Optional. Initial proposed lease Id.
*/
getDataLakeLeaseClient(proposeLeaseId) {
return new DataLakeLeaseClient(this.blobClient.getBlobLeaseClient(proposeLeaseId));
}
/**
* Create a directory or path.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/create
*
* @param resourceType - Resource type, "directory" or "file".
* @param options - Optional. Options when creating path.
*/
async create(resourceType, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-create", options, async (updatedOptions) => {
ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps);
let expiryOptions;
let expiresOn;
if (typeof options.expiresOn === "number" && Number.isFinite(options.expiresOn)) {
expiryOptions = "RelativeToNow";
expiresOn = String(Math.round(options.expiresOn));
}
else if (options.expiresOn instanceof Date) {
expiryOptions = "Absolute";
expiresOn = options.expiresOn.toUTCString();
}
else if (options.expiresOn) {
throw new Error(`Value for expiresOn is invalid: ${options.expiresOn}`);
}
return assertResponse(await this.pathContext.create({
...updatedOptions,
resource: resourceType,
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
properties: toProperties(options.metadata),
cpkInfo: options.customerProvidedKey,
acl: options.acl ? toAclString(options.acl) : undefined,
expiryOptions,
expiresOn,
}));
});
}
/**
* Create a directory or file. If the resource already exists, it is not changed.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/create
*
* @param resourceType - Resource type, "directory" or "file".
* @param options -
*/
async createIfNotExists(resourceType, options = {}) {
return tracingClient.withSpan("DataLakePathClient-createIfNotExists", options, async (updatedOptions) => {
try {
const conditions = { ifNoneMatch: ETagAny };
const res = await this.create(resourceType, {
...options,
conditions,
tracingOptions: updatedOptions.tracingOptions,
});
return {
succeeded: true,
...res,
};
}
catch (e) {
if (e.details?.errorCode === "PathAlreadyExists") {
return {
succeeded: false,
...e.response?.parsedHeaders,
_response: e.response,
};
}
throw e;
}
});
}
/**
* Returns true if the Data Lake file represented by this client exists; false otherwise.
*
* NOTE: use this function with care since an existing file might be deleted by other clients or
* applications. Vice versa new files might be added by other clients or applications after this
* function completes.
*
* @param options - options to Exists operation.
*/
async exists(options = {}) {
return tracingClient.withSpan("DataLakeFileClient-exists", options, async (updatedOptions) => {
return this.blobClient.exists({
...updatedOptions,
customerProvidedKey: toBlobCpkInfo(updatedOptions.customerProvidedKey),
});
});
}
/**
* Delete current path (directory or file).
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/delete
*
* @param recursive - Required and valid only when the resource is a directory. If "true", all paths beneath the directory will be deleted.
* @param options - Optional. Options when deleting path.
*/
async delete(recursive, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-delete", options, async (updatedOptions) => {
if (this.isTokenCredential === undefined) {
this.isTokenCredential = false;
this.pipeline.factories.forEach((factory) => {
if (isTokenCredential(factory.credential)) {
this.isTokenCredential = true;
}
});
if (isTokenCredential(this.pipeline._credential)) {
this.isTokenCredential = true;
}
}
const paginated = recursive === true && this.isTokenCredential === true;
let continuation;
let response;
// How to handle long delete loop?
do {
response = assertResponse(await this.pathContext.delete({
...updatedOptions,
continuation,
recursive,
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
abortSignal: options.abortSignal,
paginated,
}));
continuation = response.continuation;
} while (continuation);
return response;
});
}
/**
* Delete current path (directory or file) if it exists.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/delete
*
* @param recursive - Required and valid only when the resource is a directory. If "true", all paths beneath the directory will be deleted.
* @param options -
*/
async deleteIfExists(recursive, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-deleteIfExists", options, async (updatedOptions) => {
try {
const res = await this.delete(recursive, updatedOptions);
return {
succeeded: true,
...res,
};
}
catch (e) {
if (e.details?.errorCode === "PathNotFound") {
return {
succeeded: false,
...e.response?.parsedHeaders,
_response: e.response,
};
}
throw e;
}
});
}
/**
* Returns the access control data for a path (directory of file).
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/getproperties
*
* @param options - Optional. Options when getting file access control.
*/
async getAccessControl(options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-getAccessControl", options, async (updatedOptions) => {
const response = assertResponse(await this.pathContext.getProperties({
...updatedOptions,
action: "getAccessControl",
upn: options.userPrincipalName,
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
abortSignal: options.abortSignal,
}));
return {
...response,
_response: response._response,
permissions: toPermissions(response.permissions),
acl: toAcl(response.acl),
};
});
}
/**
* Set the access control data for a path (directory of file).
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param acl - The POSIX access control list for the file or directory.
* @param options - Optional. Options when setting path access control.
*/
async setAccessControl(acl, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-setAccessControl", options, async (updatedOptions) => {
return assertResponse(await this.pathContext.setAccessControl({
...updatedOptions,
acl: toAclString(acl),
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
}));
});
}
/**
* Sets the Access Control on a path and sub paths.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param acl - The POSIX access control list for the file or directory.
* @param options - Optional. Options
*/
async setAccessControlRecursive(acl, options = {}) {
return tracingClient.withSpan("DataLakePathClient-setAccessControlRecursive", options, async (updatedOptions) => {
return this.setAccessControlRecursiveInternal("set", acl, updatedOptions);
});
}
/**
* Modifies the Access Control on a path and sub paths.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param acl - The POSIX access control list for the file or directory.
* @param options - Optional. Options
*/
async updateAccessControlRecursive(acl, options = {}) {
return tracingClient.withSpan("DataLakePathClient-updateAccessControlRecursive", options, async (updatedOptions) => {
return this.setAccessControlRecursiveInternal("modify", acl, updatedOptions);
});
}
/**
* Removes the Access Control on a path and sub paths.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param acl - The POSIX access control list for the file or directory.
* @param options - Optional. Options
*/
async removeAccessControlRecursive(acl, options = {}) {
return tracingClient.withSpan("DataLakePathClient-removeAccessControlRecursive", options, async (updatedOptions) => {
return this.setAccessControlRecursiveInternal("remove", acl, updatedOptions);
});
}
/**
* Sets the file permissions on a path.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param permissions - The POSIX access permissions for the file owner, the file owning group, and others.
* @param options - Optional. Options when setting path permissions.
*/
async setPermissions(permissions, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakePathClient-setPermissions", options, async (updatedOptions) => {
return assertResponse(await this.pathContext.setAccessControl({
...updatedOptions,
permissions: toPermissionsString(permissions),
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
}));
});
}
/**
* Returns all user-defined metadata, standard HTTP properties, and system properties
* for the path (directory or file).
*
* WARNING: The `metadata` object returned in the response will have its keys in lowercase, even if
* they originally contained uppercase characters. This differs from the metadata keys returned by
* the methods of {@link DataLakeFileSystemClient} that list paths using the `includeMetadata` option, which
* will retain their original casing.
*
* @see https://learn.microsoft.com/rest/api/storageservices/get-blob-properties
*
* @param options - Optional. Options when getting path properties.
*/
async getProperties(options = {}) {
return tracingClient.withSpan("DataLakePathClient-getProperties", options, async (updatedOptions) => {
const response = await this.blobClient.getProperties({
...options,
customerProvidedKey: toBlobCpkInfo(options.customerProvidedKey),
tracingOptions: updatedOptions.tracingOptions,
});
return ParsePathGetPropertiesExtraHeaderValues(response);
});
}
/**
* Sets system properties on the path (directory or file).
*
* If no value provided, or no value provided for the specified blob HTTP headers,
* these blob HTTP headers without a value will be cleared.
* @see https://learn.microsoft.com/rest/api/storageservices/set-blob-properties
*
* @param httpHeaders -
* @param options -
*/
async setHttpHeaders(httpHeaders, options = {}) {
return tracingClient.withSpan("DataLakePathClient-setHttpHeaders", options, async (updatedOptions) => {
return this.blobClient.setHTTPHeaders({
blobCacheControl: httpHeaders.cacheControl,
blobContentType: httpHeaders.contentType,
blobContentMD5: httpHeaders.contentMD5,
blobContentEncoding: httpHeaders.contentEncoding,
blobContentLanguage: httpHeaders.contentLanguage,
blobContentDisposition: httpHeaders.contentDisposition,
}, updatedOptions);
});
}
/**
* Sets user-defined metadata for the specified path (directory of file) as one or more name-value pairs.
*
* If no option provided, or no metadata defined in the parameter, the path
* metadata will be removed.
*
* @see https://learn.microsoft.com/rest/api/storageservices/set-blob-metadata
*
* @param metadata - Optional. Replace existing metadata with this value.
* If no value provided the existing metadata will be removed.
* @param options - Optional. Options when setting path metadata.
*/
async setMetadata(metadata, options = {}) {
return tracingClient.withSpan("DataLakePathClient-setMetadata", options, async (updatedOptions) => {
return this.blobClient.setMetadata(metadata, {
...options,
customerProvidedKey: toBlobCpkInfo(options.customerProvidedKey),
tracingOptions: updatedOptions.tracingOptions,
});
});
}
async move(destinationPathOrFileSystem, destinationPathOrOptions, options) {
let destinationFileSystem = this.fileSystemName;
let destinationPath = destinationPathOrFileSystem;
let pathMoveOptions;
if (typeof destinationPathOrOptions === "string") {
destinationFileSystem = destinationPathOrFileSystem;
destinationPath = destinationPathOrOptions;
pathMoveOptions = options ?? {};
}
else {
pathMoveOptions = destinationPathOrOptions ?? {};
}
const renameSource = getURLPathAndQuery(this.dfsEndpointUrl);
const split = destinationPath.split("?");
let destinationUrl;
if (split.length === 2) {
const renameDestination = `/${destinationFileSystem}/${split[0]}`;
destinationUrl = setURLPath(this.dfsEndpointUrl, renameDestination);
destinationUrl = setURLQueries(destinationUrl, split[1]);
}
else if (split.length === 1) {
const renameDestination = `/${destinationFileSystem}/${destinationPath}`;
destinationUrl = setURLPath(this.dfsEndpointUrl, renameDestination);
}
else {
throw new RangeError("Destination path should not contain more than one query string");
}
const destPathClient = new DataLakePathClient(destinationUrl, this.pipeline);
return tracingClient.withSpan("DataLakePathClient-move", pathMoveOptions, async (updatedOptions) => {
return assertResponse(await destPathClient.pathContext.create({
...updatedOptions,
mode: "legacy", // By default
renameSource,
sourceLeaseId: pathMoveOptions.conditions?.leaseId,
leaseAccessConditions: pathMoveOptions.destinationConditions,
sourceModifiedAccessConditions: {
sourceIfMatch: pathMoveOptions.conditions?.ifMatch,
sourceIfNoneMatch: pathMoveOptions.conditions?.ifNoneMatch,
sourceIfModifiedSince: pathMoveOptions.conditions?.ifModifiedSince,
sourceIfUnmodifiedSince: pathMoveOptions.conditions?.ifUnmodifiedSince,
},
modifiedAccessConditions: pathMoveOptions.destinationConditions,
abortSignal: pathMoveOptions.abortSignal,
}));
});
}
}
/**
* A DataLakeDirectoryClient represents a URL to the Azure Storage directory.
*/
export class DataLakeDirectoryClient extends DataLakePathClient {
async create(resourceTypeOrOptions, options = {}) {
if (resourceTypeOrOptions === "file") {
throw TypeError(`DataLakeDirectoryClient:create() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeFileClient for file creation.`);
}
let pathCreateOptions;
if (resourceTypeOrOptions === "directory") {
pathCreateOptions = options;
}
else {
pathCreateOptions = resourceTypeOrOptions ?? {};
}
return tracingClient.withSpan("DataLakeDirectoryClient-create", pathCreateOptions, async (updatedOptions) => {
return super.create("directory", updatedOptions);
});
}
async createIfNotExists(resourceTypeOrOptions, options = {}) {
if (resourceTypeOrOptions === "file") {
throw TypeError(`DataLakeDirectoryClient:createIfNotExists() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeFileClient for file creation.`);
}
if (resourceTypeOrOptions !== "directory") {
options = resourceTypeOrOptions || {};
}
return tracingClient.withSpan("DataLakeDirectoryClient-createIfNotExists", options, async (updatedOptions) => {
return super.createIfNotExists("directory", {
...updatedOptions,
});
});
}
/**
* Creates a {@link DataLakeDirectoryClient} object under current directory.
*
* @param subdirectoryName - Subdirectory name.
*/
getSubdirectoryClient(subdirectoryName) {
return new DataLakeDirectoryClient(appendToURLPath(this.url, encodeURIComponent(subdirectoryName)), this.pipeline);
}
/**
* Creates a {@link DataLakeFileClient} object under current directory.
*
* @param fileName -
*/
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-subclients */
getFileClient(fileName) {
return new DataLakeFileClient(appendToURLPath(this.url, encodeURIComponent(fileName)), this.pipeline);
}
/**
* Only available for clients constructed with a shared key credential.
*
* Generates a Service Shared Access Signature (SAS) URI based on the client properties
* and parameters passed in. The SAS is signed by the shared key credential of the client.
*
* @see https://learn.microsoft.com/rest/api/storageservices/constructing-a-service-sas
*
* @param options - Optional parameters.
* @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token.
*/
generateSasUrl(options) {
return new Promise((resolve) => {
if (!(this.credential instanceof StorageSharedKeyCredential)) {
throw RangeError("Can only generate the SAS when the client is initialized with a shared key credential");
}
const sas = generateDataLakeSASQueryParameters({
fileSystemName: this.fileSystemName,
pathName: this.name,
isDirectory: true,
...options,
}, this.credential).toString();
resolve(appendToURLQuery(this.url, sas));
});
}
/**
* Generates string to sign for a Service Shared Access Signature (SAS) URI based on the client properties
* and parameters passed in.
*
* @see https://learn.microsoft.com/rest/api/storageservices/constructing-a-service-sas
*
* @param options - Optional parameters.
* @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token.
*/
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-options*/
generateSasStringToSign(options) {
if (!(this.credential instanceof StorageSharedKeyCredential)) {
throw RangeError("Can only generate the SAS when the client is initialized with a shared key credential");
}
return generateDataLakeSASQueryParametersInternal({
fileSystemName: this.fileSystemName,
pathName: this.name,
isDirectory: true,
...options,
}, this.credential).stringToSign;
}
/**
* Generates a Service Shared Access Signature (SAS) URI based on the client properties
* and parameters passed in. The SAS is signed by the input user delegation key.
*
* @see https://learn.microsoft.com/rest/api/storageservices/constructing-a-service-sas
*
* @param options - Optional parameters.
* @param userDelegationKey - Return value of `blobServiceClient.getUserDelegationKey()`
* @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token.
*/
generateUserDelegationSasUrl(options, userDelegationKey) {
return new Promise((resolve) => {
const sas = generateDataLakeSASQueryParameters({
fileSystemName: this.fileSystemName,
pathName: this.name,
isDirectory: true,
...options,
}, userDelegationKey, this.accountName).toString();
resolve(appendToURLQuery(this.url, sas));
});
}
/**
* Generates string to sign for a Service Shared Access Signature (SAS) URI based on the client properties
* and parameters passed in The SAS is signed by the input user delegation key.
*
* @see https://learn.microsoft.com/rest/api/storageservices/constructing-a-service-sas
*
* @param options - Optional parameters.
* @param userDelegationKey - Return value of `blobServiceClient.getUserDelegationKey()`
* @returns The SAS URI consisting of the URI to the resource represented by this client, followed by the generated SAS token.
*/
generateUserDelegationSasStringToSign(options, userDelegationKey) {
return generateDataLakeSASQueryParametersInternal({
fileSystemName: this.fileSystemName,
pathName: this.name,
isDirectory: true,
...options,
}, userDelegationKey, this.accountName).stringToSign;
}
}
/**
* A DataLakeFileClient represents a URL to the Azure Storage file.
*/
export class DataLakeFileClient extends DataLakePathClient {
/**
* pathContextInternal provided by protocol layer.
*/
pathContextInternal;
/**
* pathContextInternal provided by protocol layer, with its url pointing to the Blob endpoint.
*/
pathContextInternalToBlobEndpoint;
/**
* blockBlobClientInternal provided by `@azure/storage-blob` package.
*/
blockBlobClientInternal;
constructor(url, credentialOrPipeline,
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */
options) {
if (isPipelineLike(credentialOrPipeline)) {
super(url, credentialOrPipeline);
}
else {
let credential;
if (credentialOrPipeline === undefined) {
credential = new AnonymousCredential();
}
else {
credential = credentialOrPipeline;
}
const pipeline = newPipeline(credential, options);
super(url, pipeline);
}
this.pathContextInternal = new Path(this.storageClientContext);
this.blockBlobClientInternal = new BlockBlobClient(this.blobEndpointUrl, this.pipeline);
this.pathContextInternalToBlobEndpoint = new Path(this.storageClientContextToBlobEndpoint);
}
async create(resourceTypeOrOptions, options = {}) {
if (resourceTypeOrOptions === "directory") {
throw TypeError(`DataLakeFileClient:create() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeDirectoryClient for directory creation.`);
}
let pathCreateOptions;
if (resourceTypeOrOptions === "file") {
pathCreateOptions = options;
}
else {
pathCreateOptions = resourceTypeOrOptions ?? {};
}
return tracingClient.withSpan("DataLakeFileClient-create", pathCreateOptions, async (updatedOptions) => {
return super.create("file", updatedOptions);
});
}
async createIfNotExists(resourceTypeOrOptions, options = {}) {
if (resourceTypeOrOptions === "directory") {
throw TypeError(`DataLakeFileClient:createIfNotExists() resourceType cannot be ${resourceTypeOrOptions}. Refer to DataLakeDirectoryClient for directory creation.`);
}
if (resourceTypeOrOptions !== "file") {
options = resourceTypeOrOptions || {};
}
return tracingClient.withSpan("DataLakeFileClient-createIfNotExists", options, async (updatedOptions) => {
return super.createIfNotExists("file", updatedOptions);
});
}
/**
* Downloads a file from the service, including its metadata and properties.
*
* * In Node.js, data returns in a Readable stream readableStreamBody
* * In browsers, data returns in a promise contentAsBlob
*
* @see https://learn.microsoft.com/rest/api/storageservices/get-blob
*
* * Example usage (Node.js):
*
* ```ts snippet:ReadmeSampleDownloadFile_Node
* import { DataLakeServiceClient } from "@azure/storage-file-datalake";
* import { DefaultAzureCredential } from "@azure/identity";
*
* const account = "<account>";
* const datalakeServiceClient = new DataLakeServiceClient(
* `https://${account}.dfs.core.windows.net`,
* new DefaultAzureCredential(),
* );
*
* const fileSystemName = "<file system name>";
* const fileName = "<file name>";
* const fileSystemClient = datalakeServiceClient.getFileSystemClient(fileSystemName);
* const fileClient = fileSystemClient.getFileClient(fileName);
*
* // Get file content from position 0 to the end
* // In Node.js, get downloaded data by accessing downloadResponse.readableStreamBody
* const downloadResponse = await fileClient.read();
* if (downloadResponse.readableStreamBody) {
* const downloaded = await streamToBuffer(downloadResponse.readableStreamBody);
* console.log("Downloaded file content:", downloaded.toString());
* }
*
* // [Node.js only] A helper method used to read a Node.js readable stream into a Buffer.
* async function streamToBuffer(readableStream: NodeJS.ReadableStream): Promise<Buffer> {
* return new Promise((resolve, reject) => {
* const chunks: Buffer[] = [];
* readableStream.on("data", (data) => {
* chunks.push(data instanceof Buffer ? data : Buffer.from(data));
* });
* readableStream.on("end", () => {
* resolve(Buffer.concat(chunks));
* });
* readableStream.on("error", reject);
* });
* }
* ```
*
* Example usage (browser):
*
* ```ts snippet:ReadmeSampleDownloadFile_Browser
* import { DataLakeServiceClient } from "@azure/storage-file-datalake";
*
* const account = "<account>";
* const sas = "<sas token>";
* const datalakeServiceClient = new DataLakeServiceClient(
* `https://${account}.dfs.core.windows.net${sas}`,
* );
*
* const fileSystemName = "<file system name>";
* const fileName = "<file name>";
* const fileSystemClient = datalakeServiceClient.getFileSystemClient(fileSystemName);
* const fileClient = fileSystemClient.getFileClient(fileName);
*
* // Get file content from position 0 to the end
* // In browsers, get downloaded data by accessing downloadResponse.contentAsBlob
* const downloadResponse = await fileClient.read();
* if (downloadResponse.contentAsBlob) {
* const blob = await downloadResponse.contentAsBlob;
* const downloaded = await blob.text();
* console.log(`Downloaded file content ${downloaded}`);
* }
* ```
*
* @param offset - Optional. Offset to read file, default value is 0.
* @param count - Optional. How many bytes to read, default will read from offset to the end.
* @param options - Optional. Options when reading file.
*/
async read(offset = 0, count, options = {}) {
return tracingClient.withSpan("DataLakeFileClient-read", options, async (updatedOptions) => {
const rawResponse = await this.blockBlobClientInternal.download(offset, count, {
...updatedOptions,
customerProvidedKey: toBlobCpkInfo(updatedOptions.customerProvidedKey),
});
const response = ParsePathGetPropertiesExtraHeaderValues(rawResponse);
if (!isNodeLike && !response.contentAsBlob) {
response.contentAsBlob = rawResponse.blobBody;
}
response.fileContentMD5 = rawResponse.blobContentMD5;
response._response.parsedHeaders.fileContentMD5 =
rawResponse._response.parsedHeaders.blobContentMD5;
delete rawResponse.blobContentMD5;
delete rawResponse._response.parsedHeaders.blobContentMD5;
return response;
});
}
/**
* Uploads data to be appended to a file. Data can only be appended to a file.
* To apply perviously uploaded data to a file, call flush.
*
* @see https://learn.microsoft.com/rest/api/storageservices/datalakestoragegen2/path/update
*
* @param body - Content to be uploaded.
* @param offset - Append offset in bytes.
* @param length - Length of content to append in bytes.
* @param options - Optional. Options when appending data.
*/
async append(body, offset, length, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakeFileClient-append", options, async (updatedOptions) => {
ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps);
return assertResponse(await this.pathContextInternal.appendData(body, {
...updatedOptions,
pathHttpHeaders: {
contentMD5: options.transactionalContentMD5,
},
abortSignal: options.abortSignal,
position: offset,
contentLength: length,
leaseAccessConditions: options.conditions,
requestOptions: {
onUploadProgress: options.onProgress,
},
cpkInfo: options.customerProvidedKey,
flush: options.flush,
proposedLeaseId: options.proposedLeaseId,
leaseDuration: options.leaseDurationInSeconds,
leaseAction: options.leaseAction,
}));
});
}
/**
* Flushes (writes) previously appended data to a file.
*
* @param position - File position to flush.
* This parameter allows the caller to upload data in parallel and control the order in which it is appended to the file.
* It is required when uploading data to be appended to the file and when flushing previously uploaded data to the file.
* The value must be the position where the data is to be appended. Uploaded data is not immediately flushed, or written,
* to the file. To flush, the previously uploaded data must be contiguous, the position parameter must be specified and
* equal to the length of the file after all data has been written, and there must not be a request entity body included
* with the request.
* @param options - Optional. Options when flushing data.
*/
async flush(position, options = {}) {
options.conditions = options.conditions || {};
return tracingClient.withSpan("DataLakeFileClient-flush", options, async (updatedOptions) => {
ensureCpkIfSpecified(options.customerProvidedKey, this.isHttps);
return assertResponse(await this.pathContextInternal.flushData({
...updatedOptions,
position,
contentLength: 0,
leaseAccessConditions: options.conditions,
modifiedAccessConditions: options.conditions,
cpkInfo: options.customerProvidedKey,
proposedLeaseId: options.proposedLeaseId,
leaseDuration: options.leaseDurationInSeconds,
leaseAction: options.leaseAction,
}));
});
}
// high level functions
/**
* ONLY AVAILABLE IN NODE.JS RUNTIME.
*
* Uploads a local file to a Data Lake file.
*
* @param filePath - Full path of the local file
* @param options -
*/
async uploadFile(filePath,
// Legacy, no way to fix the eslint error without breaking. Disable the rule for this line.
/* eslint-disable-next-line @azure/azure-sdk/ts-naming-options */
options = {}) {
return tracingClient.withSpan("DataLakeFileClient-uploadFile", options, async (updatedOptions) => {
const size = (await fsStat(filePath)).size;
return this.uploadSeekableInternal((offset, contentSize) => {
return () => fsCreateReadStream(filePath, {
autoClose: true,
end: offset + contentSize - 1,
start: offset,
});
}, size, updatedOptions);
});
}
/**
* Uploads a Buffer(Node.js)/Blob/ArrayBuffer/ArrayBufferView to a File.
*
* @param data - Buffer(Node), Blob, ArrayBuffer or ArrayBufferView
* @param options -
*/
async upload(data, options = {}) {
return tracingClient.withSpan("DataLakeFileClient-upload", options, async (updatedOptions) => {
if (isNodeLike) {
let buffer;
if (data instanceof Buffer) {
buffer = data;
}
else if (data instanceof ArrayBuffer) {
buffer = Buffer.from(data);
}
else {
data = data;
buffer = Buffer.from(data.buffer, data.byteOffset, data.byteLength);
}
return this.uploadSeekableInternal((offset, size) => buffer.slice(offset, offset + size), buffer.length, updatedOptions);
}
else {
const browserBlob = new Blob([data]);
return this.uploadSeekableInternal((offset, size) => browserBlob.slice(offset, offset + size), browserBlob.size, updatedOptions);
}
});
}
async uploadSeekableInternal(bodyFactory, size, options = {}) {
return tracingClient.withSpan("DataLakeFileClient-uploadData", options, async (updatedOptions) => {
if (size > FILE_MAX_SIZE_BYTES) {
throw new RangeError(`size must be <= ${FILE_MAX_SIZE_BYTES}.`);
}
// Create the file.
const createRes = this.create({
abortSignal: options.abortSignal,
metadata: options.metadata,
permissions: options.permissions,
umask: options.umask,
conditions: options.conditions,
pathHttpHeaders: options.pathHttpHeaders,
customerProvidedKey: updatedOptions.customerProvidedKey,
tracingOptions: updatedOptions.tracingOptions,
encryptionContext: updatedOptions.encryptionContext,
});
// append() with empty data would return error, so do not continue
if (size === 0) {
return createRes;
}
else {
await createRes;
}
// After the File is Create, Lease ID is the only valid request parameter.
options.conditions = { leaseId: options.conditions?.leaseId };
if (!options.chunkSize) {
options.chunkSize = Math.ceil(size / BLOCK_BLOB_MAX_BLOCKS);
if (options.chunkSize < FILE_UPLOAD_DEFAULT_CHUNK_SIZE) {
options.chunkSize = FILE_UPLOAD_DEFAULT_CHUNK_SIZE;
}
}
if (options.chunkSize < 1 || options.chunkSize > FILE_UPLOAD_MAX_CHUNK_SIZE) {
throw new RangeError(`chunkSize option must be >= 1 and <= ${FILE_UPLOAD_MAX_CHUNK_SIZE}`);
}
if (!options.maxConcurrency) {
options.maxConcurrency = DEFAULT_HIGH_LEVEL_CONCURRENCY;
}
if (options.maxConcurrency <= 0) {
throw new RangeError(`maxConcurrency must be > 0.`);
}
if (!options.singleUploadThreshold) {
options.singleUploadThreshold = FILE_MAX_SINGLE_UPLOAD_THRESHOLD;
}
if (options.singleUploadThreshold < 1 ||
options.singleUploadThreshold > FILE_MAX_SINGLE_UPLOAD_THRESHOLD) {
throw new RangeError(`singleUploadThreshold option must be >= 1 and <= ${FILE_MAX_SINGLE_UPLOAD_THRESHOLD}`);
}
// When buffer length <= singleUploadThreshold, this method will use one append/flush call to finish the upload.
if (size <= options.singleUploadThreshold) {
await this.append(bodyFactory(0, size), 0, size, {
abortSignal: options.abortSignal,
conditions: options.conditions,
customerProvidedKey: updatedOptions.customerProvidedKey,
onProgress: options.onProgress,
tracingOptions: updatedOptions.tracingOptions,
});
return this.flush(size, {
abortSignal: options.abortSignal,
conditions: options.conditions,
close: options.close,
pathHttpHeaders: options.pathHttpHeaders,
customerProvidedKey: updatedOptions.customerProvidedKey,
tracingOptions: updatedOptions.tracingOptions,
});
}
const numBlocks = Math.floor((size - 1) / options.chunkSize) + 1;
if (numBlocks > BLOCK_BLOB_MAX_BLOCKS) {
throw new RangeError(`The data's size is too big or the chunkSize is too small;` +
`the number of chunks must be <= ${BLOCK_BLOB_MAX_BLOCKS}`);
}
let transferProgress = 0;
const batch = new Batch(options.maxConcurrency);
for (let i = 0; i < numBlocks; i++) {
batch.addOperation(async () => {
const start = options.chunkSize * i;
const end = i === numBlocks - 1 ? size : start + options.chunkSize;
const contentLength = end - start;
await this.append(bodyFactory(start, contentLength), start, contentLength, {
abortSignal: options.abortSignal,
conditions: options.conditions,
customerProvidedKey: updatedOptions.customerProvidedKey,
tracingOptions: updatedOptions.tracingOptions,
});
transferProgress += contentLength;
if (options.onProgress) {
options.onProgress({ loadedBytes: transferProgress });
}
});
}
await batch.do();
return this.flush(size, {
abortSignal: options.abortSignal,
conditions: options.conditions,
close: options.close,
pathHttpHeaders: options.pathHttpHeaders,
customerProvidedKey: updatedOptions.customerProvidedKey,
tracingOptions: updatedOptions.tracingOptions,
});
});
}
/**
* ONLY AVAILABLE IN NODE.JS RUNTIME.
*
* Uploads a Node.js Readable stream into a Data Lake file.
* This method will try to create a file, then starts uploading chunk by chunk.
* Please make sure potential size of stream doesn't exceed FILE_MAX_SIZE_BYTES and
* potential number of chunks doesn't exceed BLOCK_BLOB_MAX_BLOCKS.
*
* PERFORMANCE IMPROVEMENT TIPS:
* * Input stream highWaterMark is better to set a same value with options.chunkSize
* parameter, which will avoid Buffer.concat() operations.
*
* @param stream - Node.js Readable stream.
* @param options -
*/
async uploadStream(stream, options = {}) {
return tracingClient.withSpan("DataLakeFileClient-uploadStream", opti