"use strict";
|
/**
|
* @license
|
* Copyright 2018 Google Inc. All Rights Reserved.
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
* =============================================================================
|
*/
|
Object.defineProperty(exports, "__esModule", { value: true });
|
var gradients_1 = require("../gradients");
|
var tensor_util_env_1 = require("../tensor_util_env");
|
var util_1 = require("../util");
|
var axis_util_1 = require("./axis_util");
|
var binary_ops_1 = require("./binary_ops");
|
var operation_1 = require("./operation");
|
var tensor_ops_1 = require("./tensor_ops");
|
var Reduction;
|
(function (Reduction) {
|
Reduction[Reduction["NONE"] = 0] = "NONE";
|
Reduction[Reduction["MEAN"] = 1] = "MEAN";
|
Reduction[Reduction["SUM"] = 2] = "SUM";
|
Reduction[Reduction["SUM_BY_NONZERO_WEIGHTS"] = 3] = "SUM_BY_NONZERO_WEIGHTS";
|
})(Reduction = exports.Reduction || (exports.Reduction = {}));
|
/**
|
* Computes the weighted loss between two tensors.
|
*
|
* @param losses Tensor of shape `[batch_size, d1, ... dN]`.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `losses`, and must be broadcastable to `losses` (i.e., all
|
* dimensions must be either `1`, or the same as the corresponding
|
* `losses` dimension).
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function computeWeightedLoss_(losses, weights, reduction) {
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $losses = tensor_util_env_1.convertToTensor(losses, 'losses', 'computeWeightedLoss');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'computeWeightedLoss');
|
}
|
var weightedLoss = ($weights == null) ? $losses : $losses.mul($weights);
|
if (reduction === Reduction.NONE) {
|
return weightedLoss;
|
}
|
if (reduction === Reduction.SUM) {
|
return weightedLoss.sum();
|
}
|
if (reduction === Reduction.MEAN) {
|
if ($weights == null) {
|
return weightedLoss.mean();
|
}
|
else {
|
var broadcastFactor = $losses.size / $weights.size;
|
var result = weightedLoss.sum().div($weights.sum());
|
return broadcastFactor > 1 ? result.div(tensor_ops_1.scalar(broadcastFactor)) :
|
result;
|
}
|
}
|
if (reduction === Reduction.SUM_BY_NONZERO_WEIGHTS) {
|
if ($weights == null) {
|
return weightedLoss.sum().div(tensor_ops_1.scalar($losses.size));
|
}
|
else {
|
var broadcastedWeights = $weights.mul(tensor_ops_1.ones($losses.shape));
|
var numNonZeros = broadcastedWeights.notEqual(tensor_ops_1.scalar(0)).sum().toFloat();
|
return weightedLoss.sum().div(numNonZeros);
|
}
|
}
|
throw Error("Unknown reduction: " + reduction);
|
}
|
/**
|
* Computes the absolute difference loss between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function absoluteDifference_(labels, predictions, weights, reduction) {
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'absoluteDifference');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'absoluteDifference');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'absoluteDifference');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in absoluteDifference: ');
|
var losses = $labels.sub($predictions).abs();
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes the mean squared error between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function meanSquaredError_(labels, predictions, weights, reduction) {
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'meanSquaredError');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'meanSquaredError');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'meanSquaredError');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in meanSquaredError: ');
|
var losses = $labels.squaredDifference($predictions);
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes the cosine distance loss between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param axis The dimension along which the cosine distance is computed.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function cosineDistance_(labels, predictions, axis, weights, reduction) {
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'cosineDistance');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'cosineDistance');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'cosineDistance');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in cosineDistance: ');
|
var one = tensor_ops_1.scalar(1);
|
var losses = one.sub($labels.mul($predictions).sum(axis, true));
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes the Hinge loss between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function hingeLoss_(labels, predictions, weights, reduction) {
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'hingeLoss');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'hingeLoss');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'hingeLoss');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in hingeLoss: ');
|
var one = tensor_ops_1.scalar(1);
|
// Convert binary labels to (-1, 1)
|
$labels = tensor_ops_1.scalar(2).mul($labels).sub(one);
|
var losses = one.sub($labels.mul($predictions)).relu();
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes the log loss between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param epsilon A small increment to avoid taking log of zero
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function logLoss_(labels, predictions, weights, epsilon, reduction) {
|
if (epsilon === void 0) { epsilon = 1e-7; }
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'logLoss');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'logLoss');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'logLoss');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in logLoss: ');
|
var one = tensor_ops_1.scalar(1);
|
var epsilonScalar = tensor_ops_1.scalar(epsilon);
|
var losses = $labels.mul($predictions.add(epsilonScalar).log())
|
.neg()
|
.sub(one.sub($labels).mul(one.sub($predictions).add(epsilonScalar).log()));
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
function sigmoidCrossEntropyWithLogits_(labels, logits) {
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'sigmoidCrossEntropyWithLogits');
|
var $logits = tensor_util_env_1.convertToTensor(logits, 'logits', 'sigmoidCrossEntropyWithLogits');
|
util_1.assertShapesMatch($labels.shape, $logits.shape, 'Error in sigmoidCrossEntropyWithLogits: ');
|
/**
|
* Implementation Details:
|
*
|
* For brevity, let `x = logits`, `z = labels`. The logistic loss is
|
* z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
|
* = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
|
* = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
|
* = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
|
* = (1 - z) * x + log(1 + exp(-x))
|
* = x - x * z + log(1 + exp(-x))
|
*
|
* For x < 0, to avoid overflow in exp(-x), we reformulate the above
|
* x - x * z + log(1 + exp(-x))
|
* = log(exp(x)) - x * z + log(1 + exp(-x))
|
* = - x * z + log(1 + exp(x))
|
*
|
* Hence, to ensure stability and avoid overflow, the implementation uses
|
* this equivalent formulation:
|
* max(x, 0) - x * z + log(1 + exp(-abs(x)))
|
*/
|
var maxOutput = $logits.relu();
|
var outputXTarget = $logits.mul($labels);
|
var sigmoidOutput = $logits.abs().neg().exp().log1p();
|
return maxOutput.sub(outputXTarget).add(sigmoidOutput);
|
}
|
/**
|
* Computes the sigmoid cross entropy loss between two tensors.
|
*
|
* If labelSmoothing is nonzero, smooth the labels towards 1/2:
|
*
|
* newMulticlassLabels = multiclassLabels * (1 - labelSmoothing)
|
* + 0.5 * labelSmoothing
|
*
|
* @param multiClassLabels The ground truth output tensor of shape
|
* [batch_size, num_classes], same dimensions as 'predictions'.
|
* @param logits The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param labelSmoothing If greater than 0, then smooth the labels.
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc { heading: 'Training', subheading: 'Losses', namespace: 'losses' } */
|
function sigmoidCrossEntropy_(multiClassLabels, logits, weights, labelSmoothing, reduction) {
|
if (labelSmoothing === void 0) { labelSmoothing = 0; }
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $multiClassLabels = tensor_util_env_1.convertToTensor(multiClassLabels, 'multiClassLabels', 'sigmoidCrossEntropy');
|
var $logits = tensor_util_env_1.convertToTensor(logits, 'logits', 'sigmoidCrossEntropy');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'sigmoidCrossEntropy');
|
}
|
util_1.assertShapesMatch($multiClassLabels.shape, $logits.shape, 'Error in sigmoidCrossEntropy: ');
|
if (labelSmoothing > 0) {
|
var labelSmoothingScalar = tensor_ops_1.scalar(labelSmoothing);
|
var one = tensor_ops_1.scalar(1);
|
var half = tensor_ops_1.scalar(0.5);
|
$multiClassLabels = $multiClassLabels.mul(one.sub(labelSmoothingScalar))
|
.add(half.mul(labelSmoothingScalar));
|
}
|
var losses = sigmoidCrossEntropyWithLogits_($multiClassLabels, $logits);
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes the huber loss between two tensors.
|
*
|
* @param labels The ground truth output tensor, same dimensions as
|
* 'predictions'.
|
* @param predictions The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or the same rank as
|
* `labels`, and must be broadcastable to `labels` (i.e., all dimensions
|
* must be either `1`, or the same as the corresponding `losses`
|
* dimension).
|
* @param delta Point where huber loss changes from quadratic to linear.
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`.
|
*/
|
/** @doc {heading: 'Training', subheading: 'Losses', namespace: 'losses'} */
|
function huberLoss_(labels, predictions, weights, delta, reduction) {
|
if (delta === void 0) { delta = 1.0; }
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $labels = tensor_util_env_1.convertToTensor(labels, 'labels', 'huberLoss');
|
var $predictions = tensor_util_env_1.convertToTensor(predictions, 'predictions', 'huberLoss');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'huberLoss');
|
}
|
util_1.assertShapesMatch($labels.shape, $predictions.shape, 'Error in huberLoss: ');
|
var deltaScalar = tensor_ops_1.scalar(delta);
|
var error = $predictions.sub($labels).abs();
|
var quadratic = binary_ops_1.minimum(error, deltaScalar);
|
var linear = error.sub(quadratic);
|
var losses = tensor_ops_1.scalar(0.5).mul(quadratic.square()).add(deltaScalar.mul(linear));
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
/**
|
* Computes softmax cross entropy between logits and labels.
|
*
|
* Measures the probability error in discrete classification tasks in which
|
* the classes are mutually exclusive (each entry is in exactly one class).
|
* For example, each CIFAR-10 image is labeled with one and only one label: an
|
* image can be a dog or a truck, but not both.
|
*
|
* `NOTE`: While the classes are mutually exclusive, their probabilities need
|
* not be. All that is required is that each row of labels is a valid
|
* probability distribution. If they are not, the computation of the gradient
|
* will be incorrect.
|
*
|
* `WARNING`: This op expects unscaled logits, since it performs a softmax on
|
* logits internally for efficiency. Do not call this op with the output of
|
* softmax, as it will produce incorrect results.
|
*
|
* logits and labels must have the same shape, e.g. [batch_size, num_classes]
|
* and the same dtype.
|
* @param labels The labels array.
|
* @param logits The logits array.
|
* @param dim The dimension softmax would be performed on. Defaults to `-1`
|
* which indicates the last dimension.
|
*/
|
function softmaxCrossEntropyWithLogits_(labels, logits, dim) {
|
if (dim === void 0) { dim = -1; }
|
if (dim === -1) {
|
dim = logits.rank - 1;
|
}
|
if (dim !== logits.rank - 1) {
|
throw Error("Softmax cross entropy along a non-last dimension is not yet " +
|
("supported. Labels / logits was rank " + logits.rank + " ") +
|
("and dim was " + dim));
|
}
|
// Use a custom gradient for numerical stability.
|
var customOp = gradients_1.customGrad(function (labels, logits, save) {
|
// Reference:
|
// 1. http://cs231n.github.io/linear-classify/#softmax
|
// 2. https://blog.feedly.com/tricks-of-the-trade-logsumexp/
|
var keepDims = true;
|
var lse = logits.logSumExp([dim], keepDims);
|
var logResult = logits.toFloat().sub(lse);
|
save([labels, logResult]);
|
var costVector = logResult.mul(labels).neg();
|
var value = costVector.sum([dim]);
|
var gradFunc = function (dy, saved) {
|
var labels = saved[0], logResult = saved[1];
|
var dyShape = axis_util_1.expandShapeToKeepDim(dy.shape, [dim]);
|
return [
|
dy.reshape(dyShape).mul(labels.toFloat().sub(logResult.exp())),
|
dy.reshape(dyShape).mul(logResult.exp().sub(labels.toFloat())),
|
];
|
};
|
return { value: value, gradFunc: gradFunc };
|
});
|
return customOp(labels, logits);
|
}
|
/**
|
* Computes the softmax cross entropy loss between two tensors.
|
*
|
* If labelSmoothing is nonzero, smooth the labels towards 1/2:
|
*
|
* newOnehotLabels = onehotLabels * (1 - labelSmoothing)
|
* + labelSmoothing / numClasses
|
*
|
* @param onehotLabels One hot encoded labels
|
* [batch_size, num_classes], same dimensions as 'predictions'.
|
* @param logits The predicted outputs.
|
* @param weights Tensor whose rank is either 0, or 1, and must be
|
* broadcastable to `loss` of shape [batch_size]
|
* @param labelSmoothing If greater than 0, then smooth the labels.
|
* @param reduction Type of reduction to apply to loss. Should be of type
|
* `Reduction`
|
*/
|
/** @doc { heading: 'Training', subheading: 'Losses', namespace: 'losses' } */
|
function softmaxCrossEntropy_(onehotLabels, logits, weights, labelSmoothing, reduction) {
|
if (labelSmoothing === void 0) { labelSmoothing = 0; }
|
if (reduction === void 0) { reduction = Reduction.SUM_BY_NONZERO_WEIGHTS; }
|
var $onehotLabels = tensor_util_env_1.convertToTensor(onehotLabels, 'onehotLabels', 'softmaxCrossEntropy');
|
var $logits = tensor_util_env_1.convertToTensor(logits, 'logits', 'softmaxCrossEntropy');
|
var $weights = null;
|
if (weights != null) {
|
$weights = tensor_util_env_1.convertToTensor(weights, 'weights', 'softmaxCrossEntropy');
|
}
|
util_1.assertShapesMatch($onehotLabels.shape, $logits.shape, 'Error in softmaxCrossEntropy: ');
|
if (labelSmoothing > 0) {
|
var labelSmoothingScalar = tensor_ops_1.scalar(labelSmoothing);
|
var one = tensor_ops_1.scalar(1);
|
var numClasses = tensor_ops_1.scalar($onehotLabels.shape[1]);
|
$onehotLabels = $onehotLabels.mul(one.sub(labelSmoothingScalar))
|
.add(labelSmoothingScalar.div(numClasses));
|
}
|
var losses = softmaxCrossEntropyWithLogits_($onehotLabels, $logits);
|
return exports.computeWeightedLoss(losses, $weights, reduction);
|
}
|
exports.absoluteDifference = operation_1.op({ absoluteDifference_: absoluteDifference_ });
|
exports.computeWeightedLoss = operation_1.op({ computeWeightedLoss_: computeWeightedLoss_ });
|
exports.cosineDistance = operation_1.op({ cosineDistance_: cosineDistance_ });
|
exports.hingeLoss = operation_1.op({ hingeLoss_: hingeLoss_ });
|
exports.huberLoss = operation_1.op({ huberLoss_: huberLoss_ });
|
exports.logLoss = operation_1.op({ logLoss_: logLoss_ });
|
exports.meanSquaredError = operation_1.op({ meanSquaredError_: meanSquaredError_ });
|
exports.sigmoidCrossEntropy = operation_1.op({ sigmoidCrossEntropy_: sigmoidCrossEntropy_ });
|
exports.softmaxCrossEntropy = operation_1.op({ softmaxCrossEntropy_: softmaxCrossEntropy_ });
|
//# sourceMappingURL=loss_ops.js.map
|