/**
|
* @license
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
* =============================================================================
|
*/
|
import { AdadeltaOptimizer } from './adadelta_optimizer';
|
import { AdagradOptimizer } from './adagrad_optimizer';
|
import { AdamOptimizer } from './adam_optimizer';
|
import { AdamaxOptimizer } from './adamax_optimizer';
|
import { MomentumOptimizer } from './momentum_optimizer';
|
import { RMSPropOptimizer } from './rmsprop_optimizer';
|
import { SGDOptimizer } from './sgd_optimizer';
|
export class OptimizerConstructors {
|
/**
|
* Constructs a `tf.SGDOptimizer` that uses stochastic gradient descent.
|
*
|
* ```js
|
* // Fit a quadratic function by learning the coefficients a, b, c.
|
* const xs = tf.tensor1d([0, 1, 2, 3]);
|
* const ys = tf.tensor1d([1.1, 5.9, 16.8, 33.9]);
|
*
|
* const a = tf.scalar(Math.random()).variable();
|
* const b = tf.scalar(Math.random()).variable();
|
* const c = tf.scalar(Math.random()).variable();
|
*
|
* // y = a * x^2 + b * x + c.
|
* const f = x => a.mul(x.square()).add(b.mul(x)).add(c);
|
* const loss = (pred, label) => pred.sub(label).square().mean();
|
*
|
* const learningRate = 0.01;
|
* const optimizer = tf.train.sgd(learningRate);
|
*
|
* // Train the model.
|
* for (let i = 0; i < 10; i++) {
|
* optimizer.minimize(() => loss(f(xs), ys));
|
* }
|
*
|
* // Make predictions.
|
* console.log(
|
* `a: ${a.dataSync()}, b: ${b.dataSync()}, c: ${c.dataSync()}`);
|
* const preds = f(xs).dataSync();
|
* preds.forEach((pred, i) => {
|
* console.log(`x: ${i}, pred: ${pred}`);
|
* });
|
* ```
|
*
|
* @param learningRate The learning rate to use for the SGD algorithm.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static sgd(learningRate) {
|
return new SGDOptimizer(learningRate);
|
}
|
/**
|
* Constructs a `tf.MomentumOptimizer` that uses momentum gradient
|
* descent.
|
*
|
* See
|
* [http://proceedings.mlr.press/v28/sutskever13.pdf](
|
* http://proceedings.mlr.press/v28/sutskever13.pdf)
|
*
|
* @param learningRate The learning rate to use for the Momentum gradient
|
* descent algorithm.
|
* @param momentum The momentum to use for the momentum gradient descent
|
* algorithm.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static momentum(learningRate, momentum, useNesterov = false) {
|
return new MomentumOptimizer(learningRate, momentum, useNesterov);
|
}
|
/**
|
* Constructs a `tf.RMSPropOptimizer` that uses RMSProp gradient
|
* descent. This implementation uses plain momentum and is not centered
|
* version of RMSProp.
|
*
|
* See
|
* [http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf](
|
* http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
|
*
|
* @param learningRate The learning rate to use for the RMSProp gradient
|
* descent algorithm.
|
* @param decay The discounting factor for the history/coming gradient.
|
* @param momentum The momentum to use for the RMSProp gradient descent
|
* algorithm.
|
* @param epsilon Small value to avoid zero denominator.
|
* @param centered If true, gradients are normalized by the estimated
|
* variance of the gradient.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static rmsprop(learningRate, decay = .9, momentum = 0.0, epsilon = null, centered = false) {
|
return new RMSPropOptimizer(learningRate, decay, momentum, epsilon, centered);
|
}
|
/**
|
* Constructs a `tf.AdamOptimizer` that uses the Adam algorithm.
|
* See [https://arxiv.org/abs/1412.6980](https://arxiv.org/abs/1412.6980)
|
*
|
* @param learningRate The learning rate to use for the Adam gradient
|
* descent algorithm.
|
* @param beta1 The exponential decay rate for the 1st moment estimates.
|
* @param beta2 The exponential decay rate for the 2nd moment estimates.
|
* @param epsilon A small constant for numerical stability.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static adam(learningRate = 0.001, beta1 = 0.9, beta2 = 0.999, epsilon = null) {
|
return new AdamOptimizer(learningRate, beta1, beta2, epsilon);
|
}
|
/**
|
* Constructs a `tf.AdadeltaOptimizer` that uses the Adadelta algorithm.
|
* See [https://arxiv.org/abs/1212.5701](https://arxiv.org/abs/1212.5701)
|
*
|
* @param learningRate The learning rate to use for the Adadelta gradient
|
* descent algorithm.
|
* @param rho The learning rate decay over each update.
|
* @param epsilon A constant epsilon used to better condition the grad
|
* update.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static adadelta(learningRate = .001, rho = .95, epsilon = null) {
|
return new AdadeltaOptimizer(learningRate, rho, epsilon);
|
}
|
/**
|
* Constructs a `tf.AdamaxOptimizer` that uses the Adamax algorithm.
|
* See [https://arxiv.org/abs/1412.6980](https://arxiv.org/abs/1412.6980)
|
*
|
* @param learningRate The learning rate to use for the Adamax gradient
|
* descent algorithm.
|
* @param beta1 The exponential decay rate for the 1st moment estimates.
|
* @param beta2 The exponential decay rate for the 2nd moment estimates.
|
* @param epsilon A small constant for numerical stability.
|
* @param decay The learning rate decay over each update.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static adamax(learningRate = 0.002, beta1 = 0.9, beta2 = 0.999, epsilon = null, decay = 0.0) {
|
return new AdamaxOptimizer(learningRate, beta1, beta2, epsilon, decay);
|
}
|
/**
|
* Constructs a `tf.AdagradOptimizer` that uses the Adagrad algorithm.
|
* See
|
* [http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf](
|
* http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
* or
|
* [http://ruder.io/optimizing-gradient-descent/index.html#adagrad](
|
* http://ruder.io/optimizing-gradient-descent/index.html#adagrad)
|
*
|
* @param learningRate The learning rate to use for the Adagrad gradient
|
* descent algorithm.
|
* @param initialAccumulatorValue Starting value for the accumulators, must be
|
* positive.
|
*
|
* @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}
|
*/
|
static adagrad(learningRate, initialAccumulatorValue = 0.1) {
|
return new AdagradOptimizer(learningRate, initialAccumulatorValue);
|
}
|
}
|
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"optimizer_constructors.js","sourceRoot":"","sources":["../../../../../../tfjs-core/src/optimizers/optimizer_constructors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAC,iBAAiB,EAAC,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAC,gBAAgB,EAAC,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAC,eAAe,EAAC,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAC,iBAAiB,EAAC,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAC,gBAAgB,EAAC,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAE7C,MAAM,OAAO,qBAAqB;IAChC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoCG;IACH,MAAM,CAAC,GAAG,CAAC,YAAoB;QAC7B,OAAO,IAAI,YAAY,CAAC,YAAY,CAAC,CAAC;IACxC,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,MAAM,CAAC,QAAQ,CAAC,YAAoB,EAAE,QAAgB,EAAE,WAAW,GAAG,KAAK;QAEzE,OAAO,IAAI,iBAAiB,CAAC,YAAY,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC;IACpE,CAAC;IAED;;;;;;;;;;;;;;;;;;;OAmBG;IACH,MAAM,CAAC,OAAO,CACV,YAAoB,EAAE,KAAK,GAAG,EAAE,EAAE,QAAQ,GAAG,GAAG,EAAE,UAAkB,IAAI,EACxE,QAAQ,GAAG,KAAK;QAClB,OAAO,IAAI,gBAAgB,CACvB,YAAY,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACxD,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,IAAI,CACP,YAAY,GAAG,KAAK,EAAE,KAAK,GAAG,GAAG,EAAE,KAAK,GAAG,KAAK,EAChD,UAAkB,IAAI;QACxB,OAAO,IAAI,aAAa,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAChE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,QAAQ,CAAC,YAAY,GAAG,IAAI,EAAE,GAAG,GAAG,GAAG,EAAE,UAAkB,IAAI;QAEpE,OAAO,IAAI,iBAAiB,CAAC,YAAY,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IAC3D,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,MAAM,CAAC,MAAM,CACT,YAAY,GAAG,KAAK,EAAE,KAAK,GAAG,GAAG,EAAE,KAAK,GAAG,KAAK,EAAE,UAAkB,IAAI,EACxE,KAAK,GAAG,GAAG;QACb,OAAO,IAAI,eAAe,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;IACzE,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,MAAM,CAAC,OAAO,CAAC,YAAoB,EAAE,uBAAuB,GAAG,GAAG;QAEhE,OAAO,IAAI,gBAAgB,CAAC,YAAY,EAAE,uBAAuB,CAAC,CAAC;IACrE,CAAC;CACF","sourcesContent":["/**\n * @license\n * Copyright 2018 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n\nimport {AdadeltaOptimizer} from './adadelta_optimizer';\nimport {AdagradOptimizer} from './adagrad_optimizer';\nimport {AdamOptimizer} from './adam_optimizer';\nimport {AdamaxOptimizer} from './adamax_optimizer';\nimport {MomentumOptimizer} from './momentum_optimizer';\nimport {RMSPropOptimizer} from './rmsprop_optimizer';\nimport {SGDOptimizer} from './sgd_optimizer';\n\nexport class OptimizerConstructors {\n  /**\n   * Constructs a `tf.SGDOptimizer` that uses stochastic gradient descent.\n   *\n   * ```js\n   * // Fit a quadratic function by learning the coefficients a, b, c.\n   * const xs = tf.tensor1d([0, 1, 2, 3]);\n   * const ys = tf.tensor1d([1.1, 5.9, 16.8, 33.9]);\n   *\n   * const a = tf.scalar(Math.random()).variable();\n   * const b = tf.scalar(Math.random()).variable();\n   * const c = tf.scalar(Math.random()).variable();\n   *\n   * // y = a * x^2 + b * x + c.\n   * const f = x => a.mul(x.square()).add(b.mul(x)).add(c);\n   * const loss = (pred, label) => pred.sub(label).square().mean();\n   *\n   * const learningRate = 0.01;\n   * const optimizer = tf.train.sgd(learningRate);\n   *\n   * // Train the model.\n   * for (let i = 0; i < 10; i++) {\n   *   optimizer.minimize(() => loss(f(xs), ys));\n   * }\n   *\n   * // Make predictions.\n   * console.log(\n   *     `a: ${a.dataSync()}, b: ${b.dataSync()}, c: ${c.dataSync()}`);\n   * const preds = f(xs).dataSync();\n   * preds.forEach((pred, i) => {\n   *   console.log(`x: ${i}, pred: ${pred}`);\n   * });\n   * ```\n   *\n   * @param learningRate The learning rate to use for the SGD algorithm.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static sgd(learningRate: number): SGDOptimizer {\n    return new SGDOptimizer(learningRate);\n  }\n\n  /**\n   * Constructs a `tf.MomentumOptimizer` that uses momentum gradient\n   * descent.\n   *\n   * See\n   * [http://proceedings.mlr.press/v28/sutskever13.pdf](\n   * http://proceedings.mlr.press/v28/sutskever13.pdf)\n   *\n   * @param learningRate The learning rate to use for the Momentum gradient\n   * descent algorithm.\n   * @param momentum The momentum to use for the momentum gradient descent\n   * algorithm.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static momentum(learningRate: number, momentum: number, useNesterov = false):\n      MomentumOptimizer {\n    return new MomentumOptimizer(learningRate, momentum, useNesterov);\n  }\n\n  /**\n   * Constructs a `tf.RMSPropOptimizer` that uses RMSProp gradient\n   * descent. This implementation uses plain momentum and is not centered\n   * version of RMSProp.\n   *\n   * See\n   * [http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf](\n   * http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)\n   *\n   * @param learningRate The learning rate to use for the RMSProp gradient\n   * descent algorithm.\n   * @param decay The discounting factor for the history/coming gradient.\n   * @param momentum The momentum to use for the RMSProp gradient descent\n   * algorithm.\n   * @param epsilon Small value to avoid zero denominator.\n   * @param centered If true, gradients are normalized by the estimated\n   * variance of the gradient.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static rmsprop(\n      learningRate: number, decay = .9, momentum = 0.0, epsilon: number = null,\n      centered = false): RMSPropOptimizer {\n    return new RMSPropOptimizer(\n        learningRate, decay, momentum, epsilon, centered);\n  }\n\n  /**\n   * Constructs a `tf.AdamOptimizer` that uses the Adam algorithm.\n   * See [https://arxiv.org/abs/1412.6980](https://arxiv.org/abs/1412.6980)\n   *\n   * @param learningRate The learning rate to use for the Adam gradient\n   * descent algorithm.\n   * @param beta1 The exponential decay rate for the 1st moment estimates.\n   * @param beta2 The exponential decay rate for the 2nd moment estimates.\n   * @param epsilon A small constant for numerical stability.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static adam(\n      learningRate = 0.001, beta1 = 0.9, beta2 = 0.999,\n      epsilon: number = null): AdamOptimizer {\n    return new AdamOptimizer(learningRate, beta1, beta2, epsilon);\n  }\n\n  /**\n   * Constructs a `tf.AdadeltaOptimizer` that uses the Adadelta algorithm.\n   * See [https://arxiv.org/abs/1212.5701](https://arxiv.org/abs/1212.5701)\n   *\n   * @param learningRate The learning rate to use for the Adadelta gradient\n   * descent algorithm.\n   * @param rho The learning rate decay over each update.\n   * @param epsilon A constant epsilon used to better condition the grad\n   * update.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static adadelta(learningRate = .001, rho = .95, epsilon: number = null):\n      AdadeltaOptimizer {\n    return new AdadeltaOptimizer(learningRate, rho, epsilon);\n  }\n\n  /**\n   * Constructs a `tf.AdamaxOptimizer` that uses the Adamax algorithm.\n   * See [https://arxiv.org/abs/1412.6980](https://arxiv.org/abs/1412.6980)\n   *\n   * @param learningRate The learning rate to use for the Adamax gradient\n   * descent algorithm.\n   * @param beta1 The exponential decay rate for the 1st moment estimates.\n   * @param beta2 The exponential decay rate for the 2nd moment estimates.\n   * @param epsilon A small constant for numerical stability.\n   * @param decay The learning rate decay over each update.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static adamax(\n      learningRate = 0.002, beta1 = 0.9, beta2 = 0.999, epsilon: number = null,\n      decay = 0.0): AdamaxOptimizer {\n    return new AdamaxOptimizer(learningRate, beta1, beta2, epsilon, decay);\n  }\n\n  /**\n   * Constructs a `tf.AdagradOptimizer` that uses the Adagrad algorithm.\n   * See\n   * [http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf](\n   * http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)\n   * or\n   * [http://ruder.io/optimizing-gradient-descent/index.html#adagrad](\n   * http://ruder.io/optimizing-gradient-descent/index.html#adagrad)\n   *\n   * @param learningRate The learning rate to use for the Adagrad gradient\n   * descent algorithm.\n   * @param initialAccumulatorValue Starting value for the accumulators, must be\n   * positive.\n   *\n   * @doc {heading: 'Training', subheading: 'Optimizers', namespace: 'train'}\n   */\n  static adagrad(learningRate: number, initialAccumulatorValue = 0.1):\n      AdagradOptimizer {\n    return new AdagradOptimizer(learningRate, initialAccumulatorValue);\n  }\n}\n"]}
|