@tensorflow/tfjs-layers

/** * @license * Copyright 2023 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ /// <amd-module name="@tensorflow/tfjs-layers/dist/layers/nlp/models/gpt2/gpt2_backbone" /> /** * Base class for Backbone models. */ import { serialization } from '@tensorflow/tfjs-core'; import { Embedding } from '../../../embeddings'; import { Backbone } from '../backbone'; export interface GPT2BackboneArgs { /** * Integer. The size of the token vocabulary. */ vocabularySize: number; /** * Integer. The number of transformer layers. */ numLayers: number; /** * Integer. The number of attention heads for each transformer. * The hidden size must be divisible by the number of attention heads. */ numHeads: number; /** * Integer. The size of the transformer encoding and pooler layers. */ hiddenDim: number; /** * Integer. The output dimension of the first Dense layer in a two-layer * feedforward network for each transformer. */ intermediateDim: number; /** * Float. Dropout probability for the Transformer encoder. * Defaults to 0.2. */ dropout?: number; /** * Integer. The maximum sequence length that this encoder can consume. * If `null`, `maxSequenceLength` uses the value from sequence length. * This determines the variable shape for positional embeddings. * Defaults to 1024. */ maxSequenceLength?: number; } /** * GPT-2 core network with hyperparameters. * * This network implements a Transformer-based decoder network, * Generative Pretrained Transformer-2 (GPT-2), as described in * ["Language Models are Unsupervised Multitask Learners"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf). * It includes the embedding lookups and transformer layers. * * The default constructor gives a fully customizable, randomly initialized * GPT-2 model with any number of layers, heads, and embedding * dimensions. To load preset architectures and weights, use the `fromPreset` * constructor. * * Disclaimer: Pre-trained models are provided on an "as is" basis, without * warranties or conditions of any kind. The underlying model is provided by a * third party and subject to a separate license, available * [here](https://github.com/openai/gpt-2). * * * Example usage: * ```js * const tokenIds = tf.ones([1, 12]), dtype="int32"); * const paddingMask = tf.tensor( * [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32'); * * # Pretrained GPT-2 decoder. * model = GPT2Backbone.fromPreset("gpt2_base_en"); * model.apply(inputData, {paddingMask}); * * # Randomly initialized GPT-2 decoder with custom config. * model = kerasNlp.models.GPT2Backbone({ * vocabularySize: 50257, * numLayers: 12, * numHeads: 12, * hiddenDim: 768, * intermediateDim: 3072, * maxSequenceLength: 1024, * }); * model.apply(inputData, {paddingMask}); * ``` */ export declare class GPT2Backbone extends Backbone { /** @nocollapse */ static className: string; private vocabularySize; private numLayers; private numHeads; private hiddenDim; private intermediateDim; private dropout; private maxSequenceLength; constructor(args: GPT2BackboneArgs); getConfig(): serialization.ConfigDict; get tokenEmbedding(): Embedding; }