/** * @license * Copyright 2023 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ /** * Base class for Backbone models. */ /* Original source: keras_nlp/models/gpt2/gpt2_backbone.py */ import { serialization } from '@tensorflow/tfjs-core'; import { RandomNormal } from '../../../../initializers'; import { input } from '../../../../exports'; import { Embedding } from '../../../embeddings'; import { PositionEmbedding } from '../../modeling/position_embedding'; import { add } from '../../../../exports_layers'; import { Dropout } from '../../../core'; import { TransformerDecoder } from '../../modeling/transformer_decoder'; import { getActivation } from '../../../../activations'; import { LayerNormalization } from '../../../normalization'; import { Backbone } from '../backbone'; function gpt2KernelInitializer(stddev = 0.02) { return new RandomNormal({ stddev }); } /** * GPT-2 core network with hyperparameters. * * This network implements a Transformer-based decoder network, * Generative Pretrained Transformer-2 (GPT-2), as described in * ["Language Models are Unsupervised Multitask Learners"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf). * It includes the embedding lookups and transformer layers. * * The default constructor gives a fully customizable, randomly initialized * GPT-2 model with any number of layers, heads, and embedding * dimensions. To load preset architectures and weights, use the `fromPreset` * constructor. * * Disclaimer: Pre-trained models are provided on an "as is" basis, without * warranties or conditions of any kind. The underlying model is provided by a * third party and subject to a separate license, available * [here](https://github.com/openai/gpt-2). * * * Example usage: * ```js * const tokenIds = tf.ones([1, 12]), dtype="int32"); * const paddingMask = tf.tensor( * [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32'); * * # Pretrained GPT-2 decoder. * model = GPT2Backbone.fromPreset("gpt2_base_en"); * model.apply(inputData, {paddingMask}); * * # Randomly initialized GPT-2 decoder with custom config. * model = kerasNlp.models.GPT2Backbone({ * vocabularySize: 50257, * numLayers: 12, * numHeads: 12, * hiddenDim: 768, * intermediateDim: 3072, * maxSequenceLength: 1024, * }); * model.apply(inputData, {paddingMask}); * ``` */ class GPT2Backbone extends Backbone { constructor(args) { var _a, _b, _c, _d; args.dropout = (_a = args.dropout) !== null && _a !== void 0 ? _a : 0.1; args.maxSequenceLength = (_b = args.maxSequenceLength) !== null && _b !== void 0 ? _b : 1024; // Inputs const tokenIds = input({ shape: [null], dtype: 'int32', name: 'token_ids' }); const paddingMask = input({ shape: [null], dtype: 'int32', name: 'padding_mask' }); // Embed tokens, positions. const tokenEmbedding = new Embedding({ inputDim: args.vocabularySize, outputDim: args.hiddenDim, embeddingsInitializer: gpt2KernelInitializer(0.01), name: 'token_embedding', }).apply(tokenIds); const positionEmbedding = new PositionEmbedding({ initializer: gpt2KernelInitializer(0.02), sequenceLength: args.maxSequenceLength, name: 'position_embedding', }).apply(tokenEmbedding); // Sum and apply dropout to embeddings. let x = add({ name: 'embeddings_add' }) .apply([tokenEmbedding, positionEmbedding]); x = new Dropout({ rate: args.dropout, name: 'embeddings_dropout' }) .apply(x); // Apply successive transformer decoder blocks. for (let i = 0; i < args.numLayers; i++) { x = new TransformerDecoder({ intermediateDim: args.intermediateDim, numHeads: args.numHeads, dropout: args.dropout, layerNormEpsilon: 1e-05, // TODO(pforderique): Implement gelu. activation: getActivation('relu'), kernelInitializer: gpt2KernelInitializer(0.02), normalizeFirst: true, name: `transformer_layer_${i}`, }).apply(x, { decoderPaddingMask: paddingMask }); } const sequenceOutput = new LayerNormalization({ name: 'layer_norm', axis: -1, epsilon: 1e-05, dtype: 'float32', }).apply(x); // Instantiate using Functional API Model constructor. super({ inputs: [tokenIds, paddingMask], outputs: sequenceOutput, name: 'gpt2_backbone' }); this.vocabularySize = args.vocabularySize; this.numLayers = args.numLayers; this.numHeads = args.numHeads; this.hiddenDim = args.hiddenDim; this.intermediateDim = args.intermediateDim; this.dropout = (_c = args.dropout) !== null && _c !== void 0 ? _c : 0.1; this.maxSequenceLength = (_d = args.maxSequenceLength) !== null && _d !== void 0 ? _d : 1024; } getConfig() { const config = { vocabularySize: this.vocabularySize, numLayers: this.numLayers, numHeads: this.numHeads, hiddenDim: this.hiddenDim, intermediateDim: this.intermediateDim, dropout: this.dropout, maxSequenceLength: this.maxSequenceLength, }; const baseConfig = super.getConfig(); Object.assign(config, baseConfig); return config; } get tokenEmbedding() { return this.getLayer('token_embedding'); } } /** @nocollapse */ GPT2Backbone.className = 'GPT2Backbone'; export { GPT2Backbone }; serialization.registerClass(GPT2Backbone); //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"gpt2_backbone.js","sourceRoot":"","sources":["../../../../../../../../../tfjs-layers/src/layers/nlp/models/gpt2/gpt2_backbone.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH;;GAEG;AAEH,6DAA6D;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,MAAM,mCAAmC,CAAC;AACtE,OAAO,EAAE,GAAG,EAAE,MAAM,4BAA4B,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEvC,SAAS,qBAAqB,CAAC,MAAM,GAAG,IAAI;IAC1C,OAAO,IAAI,YAAY,CAAC,EAAC,MAAM,EAAC,CAAC,CAAC;AACpC,CAAC;AA6CD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,MAAa,YAAa,SAAQ,QAAQ;IAYxC,YAAY,IAAsB;;QAChC,IAAI,CAAC,OAAO,GAAG,MAAA,IAAI,CAAC,OAAO,mCAAI,GAAG,CAAC;QACnC,IAAI,CAAC,iBAAiB,GAAG,MAAA,IAAI,CAAC,iBAAiB,mCAAI,IAAI,CAAC;QAExD,SAAS;QACT,MAAM,QAAQ,GAAG,KAAK,CAAC,EAAC,KAAK,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAC,CAAC,CAAC;QAC3E,MAAM,WAAW,GACf,KAAK,CAAC,EAAC,KAAK,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,cAAc,EAAC,CAAC,CAAC;QAE/D,2BAA2B;QAC3B,MAAM,cAAc,GAAG,IAAI,SAAS,CAAC;YACnC,QAAQ,EAAE,IAAI,CAAC,cAAc;YAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,qBAAqB,EAAE,qBAAqB,CAAC,IAAI,CAAC;YAClD,IAAI,EAAE,iBAAiB;SACxB,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAmB,CAAC;QAErC,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,CAAC;YAC9C,WAAW,EAAE,qBAAqB,CAAC,IAAI,CAAC;YACxC,cAAc,EAAE,IAAI,CAAC,iBAAiB;YACtC,IAAI,EAAE,oBAAoB;SAC3B,CAAC,CAAC,KAAK,CAAC,cAAc,CAAmB,CAAC;QAE3C,uCAAuC;QACvC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAC,IAAI,EAAE,gBAAgB,EAAC,CAAC;aAClC,KAAK,CAAC,CAAC,cAAc,EAAE,iBAAiB,CAAC,CAAmB,CAAC;QAChE,CAAC,GAAG,IAAI,OAAO,CAAC,EAAC,IAAI,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,oBAAoB,EAAC,CAAC;aAC9D,KAAK,CAAC,CAAC,CAAmB,CAAC;QAE9B,+CAA+C;QAC/C,KAAI,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE;YACtC,CAAC,GAAG,IAAI,kBAAkB,CAAC;gBACzB,eAAe,EAAE,IAAI,CAAC,eAAe;gBACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,gBAAgB,EAAE,KAAK;gBACvB,qCAAqC;gBACrC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC;gBACjC,iBAAiB,EAAE,qBAAqB,CAAC,IAAI,CAAC;gBAC9C,cAAc,EAAE,IAAI;gBACpB,IAAI,EAAE,qBAAqB,CAAC,EAAE;aAC/B,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAC,kBAAkB,EAAE,WAAW,EAAC,CAAmB,CAAC;SAClE;QAED,MAAM,cAAc,GAAG,IAAI,kBAAkB,CAAC;YAC5C,IAAI,EAAE,YAAY;YAClB,IAAI,EAAE,CAAC,CAAC;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,SAAS;SACjB,CAAC,CAAC,KAAK,CAAC,CAAC,CAAmB,CAAC;QAE9B,sDAAsD;QACtD,KAAK,CAAC;YACJ,MAAM,EAAE,CAAC,QAAQ,EAAE,WAAW,CAAC;YAC/B,OAAO,EAAE,cAAc;YACvB,IAAI,EAAE,eAAe;SACtB,CAAC,CAAC;QACH,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC;QAC1C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC;QAC5C,IAAI,CAAC,OAAO,GAAG,MAAA,IAAI,CAAC,OAAO,mCAAI,GAAG,CAAC;QACnC,IAAI,CAAC,iBAAiB,GAAG,MAAA,IAAI,CAAC,iBAAiB,mCAAI,IAAI,CAAC;IAC1D,CAAC;IAEQ,SAAS;QAChB,MAAM,MAAM,GAA6B;YACvC,cAAc,EAAE,IAAI,CAAC,cAAc;YACnC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;SAC1C,CAAC;QACF,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAa,cAAc;QACzB,OAAO,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAc,CAAC;IACvD,CAAC;;AA9FD,kBAAkB;AACF,sBAAS,GAAG,cAAc,CAAC;SAFhC,YAAY;AAiGzB,aAAa,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC","sourcesContent":["/**\n * @license\n * Copyright 2023 Google LLC.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n\n/**\n *  Base class for Backbone models.\n */\n\n/* Original source: keras_nlp/models/gpt2/gpt2_backbone.py */\nimport { serialization } from '@tensorflow/tfjs-core';\n\nimport { RandomNormal } from '../../../../initializers';\nimport { input } from '../../../../exports';\nimport { Embedding } from '../../../embeddings';\nimport { SymbolicTensor } from '../../../../engine/topology';\nimport { PositionEmbedding } from '../../modeling/position_embedding';\nimport { add } from '../../../../exports_layers';\nimport { Dropout } from '../../../core';\nimport { TransformerDecoder } from '../../modeling/transformer_decoder';\nimport { getActivation } from '../../../../activations';\nimport { LayerNormalization } from '../../../normalization';\nimport { Backbone } from '../backbone';\n\nfunction gpt2KernelInitializer(stddev = 0.02) {\n  return new RandomNormal({stddev});\n}\n\nexport interface GPT2BackboneArgs  {\n  /**\n   * Integer. The size of the token vocabulary.\n   */\n  vocabularySize: number;\n\n  /**\n   * Integer. The number of transformer layers.\n   */\n  numLayers: number;\n\n  /**\n   * Integer. The number of attention heads for each transformer.\n   * The hidden size must be divisible by the number of attention heads.\n   */\n  numHeads: number;\n\n  /**\n   * Integer. The size of the transformer encoding and pooler layers.\n   */\n  hiddenDim: number;\n\n  /**\n   * Integer. The output dimension of the first Dense layer in a two-layer\n   * feedforward network for each transformer.\n   */\n  intermediateDim: number;\n\n  /**\n   * Float. Dropout probability for the Transformer encoder.\n   * Defaults to 0.2.\n   */\n  dropout?: number;\n\n  /**\n   * Integer. The maximum sequence length that this encoder can consume.\n   * If `null`, `maxSequenceLength` uses the value from sequence length.\n   * This determines the variable shape for positional embeddings.\n   * Defaults to 1024.\n   */\n  maxSequenceLength?: number;\n}\n\n/**\n * GPT-2 core network with hyperparameters.\n *\n * This network implements a Transformer-based decoder network,\n * Generative Pretrained Transformer-2 (GPT-2), as described in\n * [\"Language Models are Unsupervised Multitask Learners\"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf).\n * It includes the embedding lookups and transformer layers.\n *\n * The default constructor gives a fully customizable, randomly initialized\n * GPT-2 model with any number of layers, heads, and embedding\n * dimensions. To load preset architectures and weights, use the `fromPreset`\n * constructor.\n *\n * Disclaimer: Pre-trained models are provided on an \"as is\" basis, without\n * warranties or conditions of any kind. The underlying model is provided by a\n * third party and subject to a separate license, available\n * [here](https://github.com/openai/gpt-2).\n *\n *\n * Example usage:\n * ```js\n * const tokenIds = tf.ones([1, 12]), dtype=\"int32\");\n * const paddingMask = tf.tensor(\n *  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], 'int32');\n *\n * # Pretrained GPT-2 decoder.\n * model = GPT2Backbone.fromPreset(\"gpt2_base_en\");\n * model.apply(inputData, {paddingMask});\n *\n * # Randomly initialized GPT-2 decoder with custom config.\n * model = kerasNlp.models.GPT2Backbone({\n *     vocabularySize: 50257,\n *     numLayers: 12,\n *     numHeads: 12,\n *     hiddenDim: 768,\n *     intermediateDim: 3072,\n *     maxSequenceLength: 1024,\n * });\n * model.apply(inputData, {paddingMask});\n * ```\n */\nexport class GPT2Backbone extends Backbone {\n  /** @nocollapse */\n  static override className = 'GPT2Backbone';\n\n  private vocabularySize: number;\n  private numLayers: number;\n  private numHeads: number;\n  private hiddenDim: number;\n  private intermediateDim: number;\n  private dropout: number;\n  private maxSequenceLength: number;\n\n  constructor(args: GPT2BackboneArgs) {\n    args.dropout = args.dropout ?? 0.1;\n    args.maxSequenceLength = args.maxSequenceLength ?? 1024;\n\n    // Inputs\n    const tokenIds = input({shape: [null], dtype: 'int32', name: 'token_ids'});\n    const paddingMask =\n      input({shape: [null], dtype: 'int32', name: 'padding_mask'});\n\n    // Embed tokens, positions.\n    const tokenEmbedding = new Embedding({\n      inputDim: args.vocabularySize,\n      outputDim: args.hiddenDim,\n      embeddingsInitializer: gpt2KernelInitializer(0.01),\n      name: 'token_embedding',\n    }).apply(tokenIds) as SymbolicTensor;\n\n    const positionEmbedding = new PositionEmbedding({\n      initializer: gpt2KernelInitializer(0.02),\n      sequenceLength: args.maxSequenceLength,\n      name: 'position_embedding',\n    }).apply(tokenEmbedding) as SymbolicTensor;\n\n    // Sum and apply dropout to embeddings.\n    let x = add({name: 'embeddings_add'})\n      .apply([tokenEmbedding, positionEmbedding]) as SymbolicTensor;\n    x = new Dropout({rate: args.dropout, name: 'embeddings_dropout'})\n      .apply(x) as SymbolicTensor;\n\n    // Apply successive transformer decoder blocks.\n    for(let i = 0; i < args.numLayers; i++) {\n      x = new TransformerDecoder({\n        intermediateDim: args.intermediateDim,\n        numHeads: args.numHeads,\n        dropout: args.dropout,\n        layerNormEpsilon: 1e-05,\n        // TODO(pforderique): Implement gelu.\n        activation: getActivation('relu'),\n        kernelInitializer: gpt2KernelInitializer(0.02),\n        normalizeFirst: true,\n        name: `transformer_layer_${i}`,\n      }).apply(x, {decoderPaddingMask: paddingMask}) as SymbolicTensor;\n    }\n\n    const sequenceOutput = new LayerNormalization({\n      name: 'layer_norm',\n      axis: -1,\n      epsilon: 1e-05,\n      dtype: 'float32',\n    }).apply(x) as SymbolicTensor;\n\n    // Instantiate using Functional API Model constructor.\n    super({\n      inputs: [tokenIds, paddingMask],\n      outputs: sequenceOutput,\n      name: 'gpt2_backbone'\n    });\n    this.vocabularySize = args.vocabularySize;\n    this.numLayers = args.numLayers;\n    this.numHeads = args.numHeads;\n    this.hiddenDim = args.hiddenDim;\n    this.intermediateDim = args.intermediateDim;\n    this.dropout = args.dropout ?? 0.1;\n    this.maxSequenceLength = args.maxSequenceLength ?? 1024;\n  }\n\n  override getConfig(): serialization.ConfigDict {\n    const config: serialization.ConfigDict = {\n      vocabularySize: this.vocabularySize,\n      numLayers: this.numLayers,\n      numHeads: this.numHeads,\n      hiddenDim: this.hiddenDim,\n      intermediateDim: this.intermediateDim,\n      dropout: this.dropout,\n      maxSequenceLength: this.maxSequenceLength,\n    };\n    const baseConfig = super.getConfig();\n    Object.assign(config, baseConfig);\n    return config;\n  }\n\n  override get tokenEmbedding(): Embedding {\n    return this.getLayer('token_embedding') as Embedding;\n  }\n}\nserialization.registerClass(GPT2Backbone);\n"]}