UNPKG

faastjs

Version:

Serverless batch computing made simple.

638 lines (637 loc) 29.4 kB
/// <reference types="node" /> import webpack from "webpack"; import { CostSnapshot } from "./cost"; import { Statistics } from "./shared"; import { CpuMeasurement, FunctionCall } from "./wrapper"; /** * The type of all supported cloud providers. * @public */ export type Provider = "aws" | "local"; /** * Options for the {@link CommonOptions.include} option. * @public */ export interface IncludeOption { /** * The path to the directory or glob to add to the cloud function. */ path: string; /** * The working directory if `path` is relative. Defaults to `process.cwd()`. * For example, if `cwd` is `"foo"` and `path` is `"bar"`, then the * contents of the directory `foo/bar/` will be added to the remote * function under the path `bar/`. */ cwd?: string; } /** * Options common across all faast.js providers. Used as argument to {@link faast}. * @remarks * There are also more specific options for each provider. See * {@link AwsOptions} and {@link LocalOptions}. * @public */ export interface CommonOptions { /** * If true, create a child process to isolate user code from faast * scaffolding. Default: true. * @remarks * If a child process is not created, faast runs in the same node instance * as the user code and may not execute in a timely fashion because user * code may * {@link https://nodejs.org/en/docs/guides/dont-block-the-event-loop/ | block the event loop}. * Creating a child process for user code allows faast.js to continue * executing even if user code never yields. This provides better * reliability and functionality: * * - Detect timeout errors more reliably, even if the function doesn't * relinquish the CPU. Not applicable to AWS, which sends separate failure * messages in case of timeout. See {@link CommonOptions.timeout}. * * - CPU metrics used for detecting invocations with high latency, which can * be used for automatically retrying calls to reduce tail latency. * * The cost of creating a child process is mainly in the memory overhead of * creating another node process. */ childProcess?: boolean; /** * When childProcess is true, the child process will be spawned with the * value of this property as the setting for --max-old-space-size. * @remarks * This is useful if a function requires the node process to limit its * memory so that another spawned process (e.g. a browser instance) can use * the rest. * @public */ childProcessMemoryMb?: number; /** * The maximum number of concurrent invocations to allow. Default: 100, * except for the `local` provider, where the default is 10. * @remarks * The concurrency limit applies to all invocations of all of the faast * functions summed together. It is not a per-function limit. To apply a * per-function limit, use {@link throttle}. A value of 0 is equivalent to * Infinity. A value of 1 ensures mutually exclusive invocations. */ concurrency?: number; /** * A user-supplied description for this function, which may make it easier * to track different functions when multiple functions are created. */ description?: string; /** * Exclude a subset of files included by {@link CommonOptions.include}. * @remarks * The exclusion can be a directory or glob. Exclusions apply to all included * entries. */ exclude?: string[]; /** * Rate limit invocations (invocations/sec). Default: no rate limit. * @remarks * Some services cannot handle more than a certain number of requests per * second, and it is easy to overwhelm them with a large number of cloud * functions. Specify a rate limit in invocation/second to restrict how * faast.js issues requests. */ rate?: number; /** * Environment variables available during serverless function execution. * Default: \{\}. */ env?: { [key: string]: string; }; /** * Garbage collector mode. Default: `"auto"`. * @remarks * Garbage collection deletes resources that were created by previous * instantiations of faast that were not cleaned up by * {@link FaastModule.cleanup}, either because it was not called or because * the process terminated and did not execute this cleanup step. In `"auto"` * mode, garbage collection may be throttled to run up to once per hour no * matter how many faast.js instances are created. In `"force"` mode, * garbage collection is run without regard to whether another gc has * already been performed recently. In `"off"` mode, garbage collection is * skipped entirely. This can be useful for performance-sensitive tests, or * for more control over when gc is performed. * * Garbage collection is cloud-specific, but in general garbage collection * should not interfere with the behavior or performance of faast cloud * functions. When {@link FaastModule.cleanup} runs, it waits for garbage * collection to complete. Therefore the cleanup step can in some * circumstances take a significant amount of time even after all * invocations have returned. * * It is generally recommended to leave garbage collection in `"auto"` mode, * otherwise garbage resources may accumulate over time and you will * eventually hit resource limits on your account. * * Also see {@link CommonOptions.retentionInDays}. */ gc?: "auto" | "force" | "off"; /** * Include files to make available in the remote function. See * {@link IncludeOption}. * @remarks * Each include entry is a directory or glob pattern. Paths can be specified * as relative or absolute paths. Relative paths are resolved relative to * the current working directory, or relative to the `cwd` option. * * If the include entry is a directory `"foo/bar"`, the directory * `"./foo/bar"` will be available in the cloud function. Directories are * recursively added. * * Glob patterns use the syntax of * {@link https://github.com/isaacs/node-glob | node glob}. * * Also see {@link CommonOptions.exclude} for file exclusions. */ include?: (string | IncludeOption)[]; /** * Maximum number of times that faast will retry each invocation. Default: 2 * (invocations can therefore be attemped 3 times in total). * @remarks * Retries are automatically attempted for transient infrastructure-level * failures such as rate limits or netowrk failures. User-level exceptions * are not retried automatically. In addition to retries performed by faast, * some providers automatically attempt retries. These are not controllable * by faast. But as a result, your function may be retried many more times * than this setting suggests. */ maxRetries?: number; /** * Memory limit for each function in MB. This setting has an effect on * pricing. Default varies by provider. * @remarks * Each provider has different settings for memory size, and performance * varies depending on the setting. By default faast picks a likely optimal * value for each provider. * * - aws: 1728MB * * - local: 512MB (however, memory size limits aren't reliable in local mode.) */ memorySize?: number; /** * Specify invocation mode. Default: `"auto"`. * @remarks * Modes specify how invocations are triggered. In https mode, the functions * are invoked through an https request or the provider's API. In queue * mode, a provider-specific queue is used to invoke functions. Queue mode * adds additional latency and (usually negligible) cost, but may scale * better for some providers. In auto mode the best default is chosen for * each provider depending on its particular performance characteristics. * * The defaults are: * * - aws: `"auto"` is `"https"`. In https mode, the AWS SDK api * is used to invoke functions. In queue mode, an AWS SNS topic is created * and triggers invocations. The AWS API Gateway service is never used by * faast, as it incurs a higher cost and is not needed to trigger * invocations. * * - local: The local provider ignores the mode setting and always uses an * internal asynchronous queue to schedule calls. * * Size limits are affected by the choice of mode. On AWS the limit is 256kb * for arguments and return values in `"queue"` mode, and 6MB for `"https"` * mode. * * Note that no matter which mode is selected, faast.js always creates a * queue for sending back intermediate results for bookeeping and * performance monitoring. */ mode?: "https" | "queue" | "auto"; /** * Specify a package.json file to include with the code package. * @remarks * By default, faast.js will use webpack to bundle dependencies your remote * module imports. In normal usage there is no need to specify a separate * package.json, as webpack will statically analyze your imports and * determine which files to bundle. * * However, there are some use cases where this is not enough. For example, * some dependencies contain native code compiled during installation, and * webpack cannot bundle these native modules. such as dependencies with * native code. or are specifically not designed to work with webpack. In * these cases, you can create a separate `package.json` for these * dependencies and pass the filename as the `packageJson` option. If * `packageJson` is an `object`, it is assumed to be a parsed JSON object * with the same structure as a package.json file (useful for specifying a * synthetic `package.json` directly in code). * * The way the `packageJson` is handled varies by provider: * * - local: Runs `npm install` in a temporary directory it prepares for the * function. * * - aws: Recursively calls faast.js to run `npm install` inside a separate * lambda function specifically created for this purpose. Faast.js uses * lambda to install dependencies to ensure that native dependencies are * compiled in an environment that can produce binaries linked against * lambda's * {@link https://aws.amazon.com/blogs/compute/running-executables-in-aws-lambda/ | execution environment}. * Packages are saved in a Lambda Layer. * * For AWS, if {@link CommonOptions.useDependencyCaching} is `true` (which * is the default), then the Lambda Layer created will be reused in future * function creation requests if the contents of `packageJson` are the same. * * The `FAAST_PACKAGE_DIR` environment variable can be useful for debugging * `packageJson` issues. */ packageJson?: string | object; /** * Cache installed dependencies from {@link CommonOptions.packageJson}. Only * applies to AWS. Default: true. * @remarks * If `useDependencyCaching` is `true`, The resulting `node_modules` folder * is cached in a Lambda Layer with the name `faast-${key}`, where `key` is * the SHA1 hash of the `packageJson` contents. These cache entries are * removed by garbage collection, by default after 24h. Using caching * reduces the need to install and upload dependencies every time a function * is created. This is important for AWS because it creates an entirely * separate lambda function to install dependencies remotely, which can * substantially increase function deployment time. * * If `useDependencyCaching` is false, the lambda layer is created with the * same name as the lambda function, and then is deleted when cleanup is * run. */ useDependencyCaching?: boolean; /** * Specify how many days to wait before reclaiming cloud garbage. Default: * 1. * @remarks * Garbage collection only deletes resources after they age beyond a certain * number of days. This option specifies how many days old a resource needs * to be before being considered garbage by the collector. Note that this * setting is not recorded when the resources are created. For example, * suppose this is the sequence of events: * * - Day 0: `faast()` is called with `retentionInDays` set to 5. Then, the * function crashes (or omits the call to {@link FaastModule.cleanup}). * * - Day 1: `faast()` is called with `retentionInDays` set to 1. * * In this sequence of events, on Day 0 the garbage collector runs and * removes resources with age older than 5 days. Then the function leaves * new garbage behind because it crashed or did not complete cleanup. On Day * 1, the garbage collector runs and deletes resources at least 1 day old, * which includes garbage left behind from Day 0 (based on the creation * timestamp of the resources). This deletion occurs even though retention * was set to 5 days when resources were created on Day 0. * * Note that if `retentionInDays` is set to 0, garbage collection will * remove all resources, even ones that may be in use by other running faast * instances. Not recommended. * * See {@link CommonOptions.gc}. */ retentionInDays?: number; /** * Reduce tail latency by retrying invocations that take substantially * longer than other invocations of the same function. Default: 3. * @remarks * faast.js automatically measures the mean and standard deviation (σ) of * the time taken by invocations of each function. Retries are attempted * when the time for an invocation exceeds the mean time by a certain * threshold. `speculativeRetryThreshold` specifies how many multiples of σ * an invocation needs to exceed the mean for a given function before retry * is attempted. * * The default value of σ is 3. This means a call to a function is retried * when the time to execute exceeds three standard deviations from the mean * of all prior executions of the same function. * * This feature is experimental. * @beta */ speculativeRetryThreshold?: number; /** * Execution time limit for each invocation, in seconds. Default: 60. * @remarks * Each provider has a maximum time limit for how long invocations can run * before being automatically terminated (or frozen). The following are the * maximum time limits as of February 2019: * * - aws: * {@link https://docs.aws.amazon.com/lambda/latest/dg/limits.html | 15 minutes} * * - local: unlimited * * Faast.js has a proactive timeout detection feature. It automatically * attempts to detect when the time limit is about to be reached and * proactively sends a timeout exception. Faast does this because not all * providers reliably send timely feedback when timeouts occur, leaving * developers to look through cloud logs. In general faast.js' timeout will * be up to 5s earlier than the timeout specified, in order to give time to * allow faast.js to send a timeout message. Proactive timeout detection * only works with {@link CommonOptions.childProcess} set to `true` (the * default). */ timeout?: number; /** * Extra webpack options to use to bundle the code package. * @remarks * By default, faast.js uses webpack to bundle the code package. Webpack * automatically handles finding and bundling dependencies, adding source * mappings, etc. If you need specialized bundling, use this option to add * or override the default webpack configuration. The library * {@link https://github.com/survivejs/webpack-merge | webpack-merge} is * used to combine configurations. * * ```typescript * const config: webpack.Configuration = merge({ * entry, * mode: "development", * output: { * path: "/", * filename: outputFilename, * libraryTarget: "commonjs2" * }, * target: "node", * resolveLoader: { modules: [__dirname, `${__dirname}/dist`] }, * node: { global: true, __dirname: false, __filename: false } * }, * webpackOptions); * ``` * * Take care when setting the values of `entry`, `output`, or * `resolveLoader`. If these options are overwritten, faast.js may fail to * bundle your code. In particular, setting `entry` to an array value will * help `webpack-merge` to concatenate its value instead of replacing the * value that faast.js inserts for you. * * Default: * * - aws: `{ externals: [new RegExp("^aws-sdk/?")] }`. In the lambda * environment `"aws-sdk"` is available in the ambient environment and * does not need to be bundled. * * - other providers: `{}` * * The `FAAST_PACKAGE_DIR` environment variable can be useful for debugging * webpack issues. */ webpackOptions?: webpack.Configuration; /** * Check arguments and return values from cloud functions are serializable * without losing information. Default: true. * @remarks * Arguments to cloud functions are automatically serialized with * `JSON.stringify` with a custom replacer that handles built-in JavaScript * types such as `Date` and `Buffer`. Return values go through the same * process. Some JavaScript objects cannot be serialized. By default * `validateSerialization` will verify that every argument and return value * can be serialized and deserialized without losing information. A * `FaastError` will be thrown if faast.js detects a problem according to * the following procedure: * * 1. Serialize arguments and return values with `JSON.stringify` using a * special `replacer` function. * * 2. Deserialize the values with `JSON.parse` with a special `reviver` * function. * * 3. Use * {@link https://nodejs.org/api/assert.html#assert_assert_deepstrictequal_actual_expected_message | assert.deepStringEqual} * to compare the original object with the deserialized object from step * 2. * * There is some overhead to this process because each argument is * serialized and deserialized, which can be costly if arguments or return * values are large. */ validateSerialization?: boolean; /** * Debugging output options. * @internal */ debugOptions?: { [key: string]: boolean; }; } export declare const commonDefaults: Required<CommonOptions>; /** * Options that apply to the {@link FaastModule.cleanup} method. * @public */ export interface CleanupOptions { /** * If true, delete provider cloud resources. Default: true. * @remarks * The cleanup operation has two functions: stopping the faast.js runtime * and deleting cloud resources that were instantiated. If `deleteResources` * is false, then only the runtime is stopped and no cloud resources are * deleted. This can be useful for debugging and examining the state of * resources created by faast.js. * * It is supported to call {@link FaastModule.cleanup} twice: once with * `deleteResources` set to `false`, which only stops the runtime, and then * again set to `true` to delete resources. This can be useful for testing. */ deleteResources?: boolean; /** * If true, delete cached resources. Default: false. * @remarks * Some resources are cached persistently between calls for performance * reasons. If this option is set to true, these cached resources are * deleted when cleanup occurs, instead of being left behind for future use. * For example, on AWS this includes the Lambda Layers that are created for * {@link CommonOptions.packageJson} dependencies. Note that only the cached * resources created by this instance of FaastModule are deleted, not cached * resources from other FaastModules. This is similar to setting * `useCachedDependencies` to `false` during function construction, except * `deleteCaches` can be set at function cleanup time, and any other * FaastModules created before cleanup may use the cached Layers. */ deleteCaches?: boolean; /** * Number of seconds to wait for garbage collection. Default: 10. * @remarks * Garbage collection can still be operating when cleanup is called; this * option limits the amount of time faast waits for the garbage collector. * If set to 0, the wait is unlimited. */ gcTimeout?: number; } export declare const CleanupOptionDefaults: Required<CleanupOptions>; /** * Summary statistics for function invocations. * @remarks * ``` * localStartLatency remoteStartLatency executionTime * ◀──────────────────▶◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ▷◀──────────▶ * * ┌───────────────────────────────────┬──────────────────────────────────────┐ * │ │ │ * │ Local │ Cloud Provider │ * │ │ │ * │ ┌─────────┐ │ ┌──────────┐ ┌──────────┐ │ * │ │ │ │ │ │ │ │ │ * │ │ local │ │ │ request │ │ │ │ * │ invoke ────────▶│ queue │────┼──▶│ queue ├────────▶│ │ │ * │ │ │ │ │ │ │ │ │ * │ └─────────┘ │ └──────────┘ │ cloud │ │ * │ │ │ function │ │ * │ ┌─────────┐ │ ┌──────────┐ │ │ │ * │ │ │ │ │ │ │ │ │ * │ result ◀────────│ local │◀───┼───│ response │◀────────│ │ │ * │ │ polling │ │ │ queue │ │ │ │ * │ │ │ │ │ │ │ │ │ * │ └─────────┘ │ └──────────┘ └──────────┘ │ * │ │ │ * └───────────────────────────────────┴──────────────────────────────────────┘ * * ◁ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ▷ * returnLatency ◀───────▶ * sendResponseLatency * ``` * * `localStartLatency` and `executionTime` are measured on one machine and are * free of clock skew. `remoteStartLatency` and `returnLatency` are measured as * time differences between machines and are subject to much more uncertainty, * and effects like clock skew. * * All times are in milliseconds. * * @public */ export declare class FunctionStats { /** * Statistics for how long invocations stay in the local queue before being * sent to the cloud provider. */ localStartLatency: Statistics; /** * Statistics for how long requests take to start execution after being sent * to the cloud provider. This typically includes remote queueing and cold * start times. Because this measurement requires comparing timestamps from * different machines, it is subject to clock skew and other effects, and * should not be considered highly accurate. It can be useful for detecting * excessively high latency problems. Faast.js attempt to correct for clock * skew heuristically. */ remoteStartLatency: Statistics; /** * Statistics for function execution time in milliseconds. This is measured * as wall clock time inside the cloud function, and does not include the * time taken to send the response to the response queue. Note that most * cloud providers round up to the next 100ms for pricing. */ executionTime: Statistics; /** * Statistics for how long it takes to send the response to the response * queue. */ sendResponseLatency: Statistics; /** * Statistics for how long it takes to return a response from the end of * execution time to the receipt of the response locally. This measurement * requires comparing timestamps from different machines, and is subject to * clock skew and other effects. It should not be considered highly * accurate. It can be useful for detecting excessively high latency * problems. Faast.js attempts to correct for clock skew heuristically. */ returnLatency: Statistics; /** * Statistics for amount of time billed. This is similar to * {@link FunctionStats.executionTime} except each sampled time is rounded * up to the next 100ms. */ estimatedBilledTime: Statistics; /** * The number of invocations attempted. If an invocation is retried, this * only counts the invocation once. */ invocations: number; /** * The number of invocations that were successfully completed. */ completed: number; /** * The number of invocation retries attempted. This counts retries * attempted by faast.js to recover from transient errors, but does not * count retries by the cloud provider. */ retries: number; /** * The number of invocations that resulted in an error. If an invocation is * retried, an error is only counted once, no matter how many retries were * attempted. */ errors: number; /** * Summarize the function stats as a string. * @returns a string showing the value of completed, retries, errors, and * mean execution time. This string excludes invocations by default because * it is often fixed. */ toString(): string; /** @internal */ clone(): FunctionStats; } export declare class FunctionExecutionMetrics { secondMetrics: Statistics[]; } export type CallId = string; export interface ResponseContext { type: "fulfill" | "reject"; value: string; callId: CallId; isErrorObject?: boolean; remoteExecutionStartTime?: number; remoteExecutionEndTime?: number; logUrl?: string; instanceId?: string; executionId?: string; memoryUsage?: NodeJS.MemoryUsage; timestamp?: number; } export interface PromiseResponseMessage extends ResponseContext { kind: "promise"; } export interface IteratorResponseMessage extends ResponseContext { kind: "iterator"; sequence: number; } export interface FunctionStartedMessage { kind: "functionstarted"; callId: CallId; } export interface CpuMetricsMessage { kind: "cpumetrics"; callId: CallId; metrics: CpuMeasurement; } export interface PollResult { Messages: Message[]; isFullMessageBatch?: boolean; } export type Message = PromiseResponseMessage | IteratorResponseMessage | FunctionStartedMessage | CpuMetricsMessage; export type Kind = Message["kind"]; export type UUID = string; export declare function filterMessages<K extends Kind>(messages: Message[], kind: K): (Extract<PromiseResponseMessage, { kind: K; }> | Extract<IteratorResponseMessage, { kind: K; }> | Extract<FunctionStartedMessage, { kind: K; }> | Extract<CpuMetricsMessage, { kind: K; }>)[]; export interface ProviderImpl<O extends CommonOptions, S> { name: Provider; defaults: Required<O>; initialize(serverModule: string, nonce: UUID, options: Required<O>): Promise<S>; costSnapshot(state: S, stats: FunctionStats): Promise<CostSnapshot>; cleanup(state: S, options: Required<CleanupOptions>): Promise<void>; logUrl(state: S): string; invoke(state: S, request: FunctionCall, cancel: Promise<void>): Promise<void>; poll(state: S, cancel: Promise<void>): Promise<PollResult>; responseQueueId(state: S): string; }