@tensorflow/tfjs-layers
Version:
TensorFlow layers API in JavaScript
156 lines • 21.3 kB
JavaScript
/**
* @license
* Copyright 2023 Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
/**
* Base class for Backbone models.
*/
/* Original source: keras_nlp/models/gpt2/gpt2_backbone.py */
import { serialization } from '@tensorflow/tfjs-core';
import { RandomNormal } from '../../../../initializers';
import { input } from '../../../../exports';
import { Embedding } from '../../../embeddings';
import { PositionEmbedding } from '../../modeling/position_embedding';
import { add } from '../../../../exports_layers';
import { Dropout } from '../../../core';
import { TransformerDecoder } from '../../modeling/transformer_decoder';
import { getActivation } from '../../../../activations';
import { LayerNormalization } from '../../../normalization';
import { Backbone } from '../backbone';
function gpt2KernelInitializer(stddev = 0.02) {
return new RandomNormal({ stddev });
}
/**
* GPT-2 core network with hyperparameters.
*
* This network implements a Transformer-based decoder network,
* Generative Pretrained Transformer-2 (GPT-2), as described in
* ["Language Models are Unsupervised Multitask Learners"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf).
* It includes the embedding lookups and transformer layers.
*
* The default constructor gives a fully customizable, randomly initialized
* GPT-2 model with any number of layers, heads, and embedding
* dimensions. To load preset architectures and weights, use the `fromPreset`
* constructor.
*
* Disclaimer: Pre-trained models are provided on an "as is" basis, without
* warranties or conditions of any kind. The underlying model is provided by a
* third party and subject to a separate license, available
* [here](https://github.com/openai/gpt-2).
*
*
* Example usage:
* ```js
* const tokenIds = tf.ones([1, 12]), dtype="int32");
* const paddingMask = tf.tensor(
* [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32');
*
* # Pretrained GPT-2 decoder.
* model = GPT2Backbone.fromPreset("gpt2_base_en");
* model.apply(inputData, {paddingMask});
*
* # Randomly initialized GPT-2 decoder with custom config.
* model = kerasNlp.models.GPT2Backbone({
* vocabularySize: 50257,
* numLayers: 12,
* numHeads: 12,
* hiddenDim: 768,
* intermediateDim: 3072,
* maxSequenceLength: 1024,
* });
* model.apply(inputData, {paddingMask});
* ```
*/
class GPT2Backbone extends Backbone {
constructor(args) {
var _a, _b, _c, _d;
args.dropout = (_a = args.dropout) !== null && _a !== void 0 ? _a : 0.1;
args.maxSequenceLength = (_b = args.maxSequenceLength) !== null && _b !== void 0 ? _b : 1024;
// Inputs
const tokenIds = input({ shape: [null], dtype: 'int32', name: 'token_ids' });
const paddingMask = input({ shape: [null], dtype: 'int32', name: 'padding_mask' });
// Embed tokens, positions.
const tokenEmbedding = new Embedding({
inputDim: args.vocabularySize,
outputDim: args.hiddenDim,
embeddingsInitializer: gpt2KernelInitializer(0.01),
name: 'token_embedding',
}).apply(tokenIds);
const positionEmbedding = new PositionEmbedding({
initializer: gpt2KernelInitializer(0.02),
sequenceLength: args.maxSequenceLength,
name: 'position_embedding',
}).apply(tokenEmbedding);
// Sum and apply dropout to embeddings.
let x = add({ name: 'embeddings_add' })
.apply([tokenEmbedding, positionEmbedding]);
x = new Dropout({ rate: args.dropout, name: 'embeddings_dropout' })
.apply(x);
// Apply successive transformer decoder blocks.
for (let i = 0; i < args.numLayers; i++) {
x = new TransformerDecoder({
intermediateDim: args.intermediateDim,
numHeads: args.numHeads,
dropout: args.dropout,
layerNormEpsilon: 1e-05,
activation: getActivation('gelu'),
kernelInitializer: gpt2KernelInitializer(0.02),
normalizeFirst: true,
name: `transformer_layer_${i}`,
}).apply(x, { decoderPaddingMask: paddingMask });
}
const sequenceOutput = new LayerNormalization({
name: 'layer_norm',
axis: -1,
epsilon: 1e-05,
dtype: 'float32',
}).apply(x);
// Instantiate using Functional API Model constructor.
super({
inputs: [tokenIds, paddingMask],
outputs: sequenceOutput,
name: 'gpt2_backbone'
});
this.vocabularySize = args.vocabularySize;
this.numLayers = args.numLayers;
this.numHeads = args.numHeads;
this.hiddenDim = args.hiddenDim;
this.intermediateDim = args.intermediateDim;
this.dropout = (_c = args.dropout) !== null && _c !== void 0 ? _c : 0.1;
this.maxSequenceLength = (_d = args.maxSequenceLength) !== null && _d !== void 0 ? _d : 1024;
}
getConfig() {
const config = {
vocabularySize: this.vocabularySize,
numLayers: this.numLayers,
numHeads: this.numHeads,
hiddenDim: this.hiddenDim,
intermediateDim: this.intermediateDim,
dropout: this.dropout,
maxSequenceLength: this.maxSequenceLength,
};
const baseConfig = super.getConfig();
Object.assign(config, baseConfig);
return config;
}
get tokenEmbedding() {
return this.getLayer('token_embedding');
}
}
/** @nocollapse */
GPT2Backbone.className = 'GPT2Backbone';
export { GPT2Backbone };
serialization.registerClass(GPT2Backbone);
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"gpt2_backbone.js","sourceRoot":"","sources":["../../../../../../../../../tfjs-layers/src/layers/nlp/models/gpt2/gpt2_backbone.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH;;GAEG;AAEH,6DAA6D;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,MAAM,mCAAmC,CAAC;AACtE,OAAO,EAAE,GAAG,EAAE,MAAM,4BAA4B,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,SAAS,qBAAqB,CAAC,MAAM,GAAG,IAAI;IAC1C,OAAO,IAAI,YAAY,CAAC,EAAC,MAAM,EAAC,CAAC,CAAC;AACpC,CAAC;AA6CD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,MAAa,YAAa,SAAQ,QAAQ;IAYxC,YAAY,IAAsB;;QAChC,IAAI,CAAC,OAAO,GAAG,MAAA,IAAI,CAAC,OAAO,mCAAI,GAAG,CAAC;QACnC,IAAI,CAAC,iBAAiB,GAAG,MAAA,IAAI,CAAC,iBAAiB,mCAAI,IAAI,CAAC;QAExD,SAAS;QACT,MAAM,QAAQ,GAAG,KAAK,CAAC,EAAC,KAAK,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAC,CAAC,CAAC;QAC3E,MAAM,WAAW,GACf,KAAK,CAAC,EAAC,KAAK,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,cAAc,EAAC,CAAC,CAAC;QAE/D,2BAA2B;QAC3B,MAAM,cAAc,GAAG,IAAI,SAAS,CAAC;YACnC,QAAQ,EAAE,IAAI,CAAC,cAAc;YAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,qBAAqB,EAAE,qBAAqB,CAAC,IAAI,CAAC;YAClD,IAAI,EAAE,iBAAiB;SACxB,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAmB,CAAC;QAErC,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,CAAC;YAC9C,WAAW,EAAE,qBAAqB,CAAC,IAAI,CAAC;YACxC,cAAc,EAAE,IAAI,CAAC,iBAAiB;YACtC,IAAI,EAAE,oBAAoB;SAC3B,CAAC,CAAC,KAAK,CAAC,cAAc,CAAmB,CAAC;QAE3C,uCAAuC;QACvC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAC,IAAI,EAAE,gBAAgB,EAAC,CAAC;aAClC,KAAK,CAAC,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAmB,CAAC;QAChE,CAAC,GAAG,IAAI,OAAO,CAAC,EAAC,IAAI,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAC,CAAC;aAC9D,KAAK,CAAC,CAAC,CAAmB,CAAC;QAE9B,+CAA+C;QAC/C,KAAI,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE;YACtC,CAAC,GAAG,IAAI,kBAAkB,CAAC;gBACzB,eAAe,EAAE,IAAI,CAAC,eAAe;gBACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,gBAAgB,EAAE,KAAK;gBACvB,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC;gBACjC,iBAAiB,EAAE,qBAAqB,CAAC,IAAI,CAAC;gBAC9C,cAAc,EAAE,IAAI;gBACpB,IAAI,EAAE,qBAAqB,CAAC,EAAE;aAC/B,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAC,kBAAkB,EAAE,WAAW,EAAC,CAAmB,CAAC;SAClE;QAED,MAAM,cAAc,GAAG,IAAI,kBAAkB,CAAC;YAC5C,IAAI,EAAE,YAAY;YAClB,IAAI,EAAE,CAAC,CAAC;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,SAAS;SACjB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAmB,CAAC;QAE9B,sDAAsD;QACtD,KAAK,CAAC;YACJ,MAAM,EAAE,CAAC,QAAQ,EAAE,WAAW,CAAC;YAC/B,OAAO,EAAE,cAAc;YACvB,IAAI,EAAE,eAAe;SACtB,CAAC,CAAC;QACH,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC;QAC1C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC;QAC5C,IAAI,CAAC,OAAO,GAAG,MAAA,IAAI,CAAC,OAAO,mCAAI,GAAG,CAAC;QACnC,IAAI,CAAC,iBAAiB,GAAG,MAAA,IAAI,CAAC,iBAAiB,mCAAI,IAAI,CAAC;IAC1D,CAAC;IAEQ,SAAS;QAChB,MAAM,MAAM,GAA6B;YACvC,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;SAC1C,CAAC;QACF,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAa,cAAc;QACzB,OAAO,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAc,CAAC;IACvD,CAAC;;AA7FD,kBAAkB;AACF,sBAAS,GAAG,cAAc,CAAC;SAFhC,YAAY;AAgGzB,aAAa,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC","sourcesContent":["/**\n * @license\n * Copyright 2023 Google LLC.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n\n/**\n *  Base class for Backbone models.\n */\n\n/* Original source: keras_nlp/models/gpt2/gpt2_backbone.py */\nimport { serialization } from '@tensorflow/tfjs-core';\n\nimport { RandomNormal } from '../../../../initializers';\nimport { input } from '../../../../exports';\nimport { Embedding } from '../../../embeddings';\nimport { SymbolicTensor } from '../../../../engine/topology';\nimport { PositionEmbedding } from '../../modeling/position_embedding';\nimport { add } from '../../../../exports_layers';\nimport { Dropout } from '../../../core';\nimport { TransformerDecoder } from '../../modeling/transformer_decoder';\nimport { getActivation } from '../../../../activations';\nimport { LayerNormalization } from '../../../normalization';\nimport { Backbone } from '../backbone';\n\nfunction gpt2KernelInitializer(stddev = 0.02) {\n  return new RandomNormal({stddev});\n}\n\nexport interface GPT2BackboneArgs  {\n  /**\n   * Integer. The size of the token vocabulary.\n   */\n  vocabularySize: number;\n\n  /**\n   * Integer. The number of transformer layers.\n   */\n  numLayers: number;\n\n  /**\n   * Integer. The number of attention heads for each transformer.\n   * The hidden size must be divisible by the number of attention heads.\n   */\n  numHeads: number;\n\n  /**\n   * Integer. The size of the transformer encoding and pooler layers.\n   */\n  hiddenDim: number;\n\n  /**\n   * Integer. The output dimension of the first Dense layer in a two-layer\n   * feedforward network for each transformer.\n   */\n  intermediateDim: number;\n\n  /**\n   * Float. Dropout probability for the Transformer encoder.\n   * Defaults to 0.2.\n   */\n  dropout?: number;\n\n  /**\n   * Integer. The maximum sequence length that this encoder can consume.\n   * If `null`, `maxSequenceLength` uses the value from sequence length.\n   * This determines the variable shape for positional embeddings.\n   * Defaults to 1024.\n   */\n  maxSequenceLength?: number;\n}\n\n/**\n * GPT-2 core network with hyperparameters.\n *\n * This network implements a Transformer-based decoder network,\n * Generative Pretrained Transformer-2 (GPT-2), as described in\n * [\"Language Models are Unsupervised Multitask Learners\"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf).\n * It includes the embedding lookups and transformer layers.\n *\n * The default constructor gives a fully customizable, randomly initialized\n * GPT-2 model with any number of layers, heads, and embedding\n * dimensions. To load preset architectures and weights, use the `fromPreset`\n * constructor.\n *\n * Disclaimer: Pre-trained models are provided on an \"as is\" basis, without\n * warranties or conditions of any kind. The underlying model is provided by a\n * third party and subject to a separate license, available\n * [here](https://github.com/openai/gpt-2).\n *\n *\n * Example usage:\n * ```js\n * const tokenIds = tf.ones([1, 12]), dtype=\"int32\");\n * const paddingMask = tf.tensor(\n *  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32');\n *\n * # Pretrained GPT-2 decoder.\n * model = GPT2Backbone.fromPreset(\"gpt2_base_en\");\n * model.apply(inputData, {paddingMask});\n *\n * # Randomly initialized GPT-2 decoder with custom config.\n * model = kerasNlp.models.GPT2Backbone({\n *     vocabularySize: 50257,\n *     numLayers: 12,\n *     numHeads: 12,\n *     hiddenDim: 768,\n *     intermediateDim: 3072,\n *     maxSequenceLength: 1024,\n * });\n * model.apply(inputData, {paddingMask});\n * ```\n */\nexport class GPT2Backbone extends Backbone {\n  /** @nocollapse */\n  static override className = 'GPT2Backbone';\n\n  private vocabularySize: number;\n  private numLayers: number;\n  private numHeads: number;\n  private hiddenDim: number;\n  private intermediateDim: number;\n  private dropout: number;\n  private maxSequenceLength: number;\n\n  constructor(args: GPT2BackboneArgs) {\n    args.dropout = args.dropout ?? 0.1;\n    args.maxSequenceLength = args.maxSequenceLength ?? 1024;\n\n    // Inputs\n    const tokenIds = input({shape: [null], dtype: 'int32', name: 'token_ids'});\n    const paddingMask =\n      input({shape: [null], dtype: 'int32', name: 'padding_mask'});\n\n    // Embed tokens, positions.\n    const tokenEmbedding = new Embedding({\n      inputDim: args.vocabularySize,\n      outputDim: args.hiddenDim,\n      embeddingsInitializer: gpt2KernelInitializer(0.01),\n      name: 'token_embedding',\n    }).apply(tokenIds) as SymbolicTensor;\n\n    const positionEmbedding = new PositionEmbedding({\n      initializer: gpt2KernelInitializer(0.02),\n      sequenceLength: args.maxSequenceLength,\n      name: 'position_embedding',\n    }).apply(tokenEmbedding) as SymbolicTensor;\n\n    // Sum and apply dropout to embeddings.\n    let x = add({name: 'embeddings_add'})\n      .apply([tokenEmbedding, positionEmbedding]) as SymbolicTensor;\n    x = new Dropout({rate: args.dropout, name: 'embeddings_dropout'})\n      .apply(x) as SymbolicTensor;\n\n    // Apply successive transformer decoder blocks.\n    for(let i = 0; i < args.numLayers; i++) {\n      x = new TransformerDecoder({\n        intermediateDim: args.intermediateDim,\n        numHeads: args.numHeads,\n        dropout: args.dropout,\n        layerNormEpsilon: 1e-05,\n        activation: getActivation('gelu'),\n        kernelInitializer: gpt2KernelInitializer(0.02),\n        normalizeFirst: true,\n        name: `transformer_layer_${i}`,\n      }).apply(x, {decoderPaddingMask: paddingMask}) as SymbolicTensor;\n    }\n\n    const sequenceOutput = new LayerNormalization({\n      name: 'layer_norm',\n      axis: -1,\n      epsilon: 1e-05,\n      dtype: 'float32',\n    }).apply(x) as SymbolicTensor;\n\n    // Instantiate using Functional API Model constructor.\n    super({\n      inputs: [tokenIds, paddingMask],\n      outputs: sequenceOutput,\n      name: 'gpt2_backbone'\n    });\n    this.vocabularySize = args.vocabularySize;\n    this.numLayers = args.numLayers;\n    this.numHeads = args.numHeads;\n    this.hiddenDim = args.hiddenDim;\n    this.intermediateDim = args.intermediateDim;\n    this.dropout = args.dropout ?? 0.1;\n    this.maxSequenceLength = args.maxSequenceLength ?? 1024;\n  }\n\n  override getConfig(): serialization.ConfigDict {\n    const config: serialization.ConfigDict = {\n      vocabularySize: this.vocabularySize,\n      numLayers: this.numLayers,\n      numHeads: this.numHeads,\n      hiddenDim: this.hiddenDim,\n      intermediateDim: this.intermediateDim,\n      dropout: this.dropout,\n      maxSequenceLength: this.maxSequenceLength,\n    };\n    const baseConfig = super.getConfig();\n    Object.assign(config, baseConfig);\n    return config;\n  }\n\n  override get tokenEmbedding(): Embedding {\n    return this.getLayer('token_embedding') as Embedding;\n  }\n}\nserialization.registerClass(GPT2Backbone);\n"]}