@tensorflow/tfjs-layers
Version:
TensorFlow layers API in JavaScript
115 lines (114 loc) • 4 kB
TypeScript
/**
* @license
* Copyright 2023 Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
/// <amd-module name="@tensorflow/tfjs-layers/dist/layers/nlp/models/gpt2/gpt2_backbone" />
/**
* Base class for Backbone models.
*/
import { serialization } from '@tensorflow/tfjs-core';
import { Embedding } from '../../../embeddings';
import { Backbone } from '../backbone';
export interface GPT2BackboneArgs {
/**
* Integer. The size of the token vocabulary.
*/
vocabularySize: number;
/**
* Integer. The number of transformer layers.
*/
numLayers: number;
/**
* Integer. The number of attention heads for each transformer.
* The hidden size must be divisible by the number of attention heads.
*/
numHeads: number;
/**
* Integer. The size of the transformer encoding and pooler layers.
*/
hiddenDim: number;
/**
* Integer. The output dimension of the first Dense layer in a two-layer
* feedforward network for each transformer.
*/
intermediateDim: number;
/**
* Float. Dropout probability for the Transformer encoder.
* Defaults to 0.2.
*/
dropout?: number;
/**
* Integer. The maximum sequence length that this encoder can consume.
* If `null`, `maxSequenceLength` uses the value from sequence length.
* This determines the variable shape for positional embeddings.
* Defaults to 1024.
*/
maxSequenceLength?: number;
}
/**
* GPT-2 core network with hyperparameters.
*
* This network implements a Transformer-based decoder network,
* Generative Pretrained Transformer-2 (GPT-2), as described in
* ["Language Models are Unsupervised Multitask Learners"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf).
* It includes the embedding lookups and transformer layers.
*
* The default constructor gives a fully customizable, randomly initialized
* GPT-2 model with any number of layers, heads, and embedding
* dimensions. To load preset architectures and weights, use the `fromPreset`
* constructor.
*
* Disclaimer: Pre-trained models are provided on an "as is" basis, without
* warranties or conditions of any kind. The underlying model is provided by a
* third party and subject to a separate license, available
* [here](https://github.com/openai/gpt-2).
*
*
* Example usage:
* ```js
* const tokenIds = tf.ones([1, 12]), dtype="int32");
* const paddingMask = tf.tensor(
* [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32');
*
* # Pretrained GPT-2 decoder.
* model = GPT2Backbone.fromPreset("gpt2_base_en");
* model.apply(inputData, {paddingMask});
*
* # Randomly initialized GPT-2 decoder with custom config.
* model = kerasNlp.models.GPT2Backbone({
* vocabularySize: 50257,
* numLayers: 12,
* numHeads: 12,
* hiddenDim: 768,
* intermediateDim: 3072,
* maxSequenceLength: 1024,
* });
* model.apply(inputData, {paddingMask});
* ```
*/
export declare class GPT2Backbone extends Backbone {
/** @nocollapse */
static className: string;
private vocabularySize;
private numLayers;
private numHeads;
private hiddenDim;
private intermediateDim;
private dropout;
private maxSequenceLength;
constructor(args: GPT2BackboneArgs);
getConfig(): serialization.ConfigDict;
get tokenEmbedding(): Embedding;
}