"use strict"; /** * @license * Copyright 2017 Google Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ var __extends = (this && this.__extends) || (function () { var extendStatics = function (d, b) { extendStatics = Object.setPrototypeOf || ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; }; return extendStatics(d, b); }; return function (d, b) { extendStatics(d, b); function __() { this.constructor = d; } d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", { value: true }); // Import webgl flags. require("./flags_webgl"); var device_util = require("../../device_util"); var engine_1 = require("../../engine"); var environment_1 = require("../../environment"); var globals_1 = require("../../globals"); var log_1 = require("../../log"); var array_ops_1 = require("../../ops/array_ops"); var array_ops_util = require("../../ops/array_ops_util"); var axis_util = require("../../ops/axis_util"); var complex_ops_1 = require("../../ops/complex_ops"); var concat_util_1 = require("../../ops/concat_util"); var gather_nd_util = require("../../ops/gather_nd_util"); var reduce_util = require("../../ops/reduce_util"); var scatter_nd_util = require("../../ops/scatter_nd_util"); var segment_util = require("../../ops/segment_util"); var slice_util = require("../../ops/slice_util"); var softmax_1 = require("../../ops/softmax"); var tensor_ops_1 = require("../../ops/tensor_ops"); var types_1 = require("../../types"); var util = require("../../util"); var util_1 = require("../../util"); var backend_1 = require("../backend"); var backend_util = require("../backend_util"); var complex_util_1 = require("../complex_util"); var non_max_suppression_impl_1 = require("../non_max_suppression_impl"); var split_shared_1 = require("../split_shared"); var tile_impl_1 = require("../tile_impl"); var topk_impl_1 = require("../topk_impl"); var where_impl_1 = require("../where_impl"); var addn_gpu_1 = require("./addn_gpu"); var addn_packed_gpu_1 = require("./addn_packed_gpu"); var argminmax_gpu_1 = require("./argminmax_gpu"); var argminmax_packed_gpu_1 = require("./argminmax_packed_gpu"); var avg_pool_backprop_gpu_1 = require("./avg_pool_backprop_gpu"); var batchnorm_gpu_1 = require("./batchnorm_gpu"); var batchnorm_packed_gpu_1 = require("./batchnorm_packed_gpu"); var binaryop_complex_gpu = require("./binaryop_complex_gpu"); var binaryop_complex_gpu_1 = require("./binaryop_complex_gpu"); var binaryop_gpu = require("./binaryop_gpu"); var binaryop_gpu_1 = require("./binaryop_gpu"); var binaryop_packed_gpu = require("./binaryop_packed_gpu"); var binaryop_packed_gpu_1 = require("./binaryop_packed_gpu"); var canvas_util_1 = require("./canvas_util"); var clip_gpu_1 = require("./clip_gpu"); var clip_packed_gpu_1 = require("./clip_packed_gpu"); var complex_abs_gpu_1 = require("./complex_abs_gpu"); var concat_gpu_1 = require("./concat_gpu"); var concat_packed_gpu_1 = require("./concat_packed_gpu"); var conv_backprop_gpu_1 = require("./conv_backprop_gpu"); var conv_backprop_gpu_depthwise_1 = require("./conv_backprop_gpu_depthwise"); var conv_gpu_1 = require("./conv_gpu"); var conv_gpu_depthwise_1 = require("./conv_gpu_depthwise"); var conv_packed_gpu_depthwise_1 = require("./conv_packed_gpu_depthwise"); var crop_and_resize_gpu_1 = require("./crop_and_resize_gpu"); var cumsum_gpu_1 = require("./cumsum_gpu"); var decode_matrix_gpu_1 = require("./decode_matrix_gpu"); var decode_matrix_packed_gpu_1 = require("./decode_matrix_packed_gpu"); var depth_to_space_gpu_1 = require("./depth_to_space_gpu"); var diag_gpu_1 = require("./diag_gpu"); var encode_float_gpu_1 = require("./encode_float_gpu"); var encode_float_packed_gpu_1 = require("./encode_float_packed_gpu"); var encode_matrix_gpu_1 = require("./encode_matrix_gpu"); var encode_matrix_packed_gpu_1 = require("./encode_matrix_packed_gpu"); var fft_gpu = require("./fft_gpu"); var fft_gpu_1 = require("./fft_gpu"); var fill_gpu_1 = require("./fill_gpu"); var gather_gpu_1 = require("./gather_gpu"); var gather_nd_gpu_1 = require("./gather_nd_gpu"); var gpgpu_context_1 = require("./gpgpu_context"); var gpgpu_math = require("./gpgpu_math"); var im2col_packed_gpu_1 = require("./im2col_packed_gpu"); var lrn_gpu_1 = require("./lrn_gpu"); var lrn_grad_gpu_1 = require("./lrn_grad_gpu"); var lrn_packed_gpu_1 = require("./lrn_packed_gpu"); var max_pool_backprop_gpu_1 = require("./max_pool_backprop_gpu"); var mulmat_packed_gpu_1 = require("./mulmat_packed_gpu"); var multinomial_gpu_1 = require("./multinomial_gpu"); var onehot_gpu_1 = require("./onehot_gpu"); var pack_gpu_1 = require("./pack_gpu"); var pad_gpu_1 = require("./pad_gpu"); var pad_packed_gpu_1 = require("./pad_packed_gpu"); var pool_gpu_1 = require("./pool_gpu"); var reduce_gpu_1 = require("./reduce_gpu"); var reshape_packed_gpu_1 = require("./reshape_packed_gpu"); var resize_bilinear_backprop_gpu_1 = require("./resize_bilinear_backprop_gpu"); var resize_bilinear_gpu_1 = require("./resize_bilinear_gpu"); var resize_bilinear_packed_gpu_1 = require("./resize_bilinear_packed_gpu"); var resize_nearest_neighbor_backprop_gpu_1 = require("./resize_nearest_neighbor_backprop_gpu"); var resize_nearest_neighbor_gpu_1 = require("./resize_nearest_neighbor_gpu"); var reverse_gpu_1 = require("./reverse_gpu"); var reverse_packed_gpu_1 = require("./reverse_packed_gpu"); var scatter_gpu_1 = require("./scatter_gpu"); var segment_gpu_1 = require("./segment_gpu"); var select_gpu_1 = require("./select_gpu"); var slice_gpu_1 = require("./slice_gpu"); var slice_packed_gpu_1 = require("./slice_packed_gpu"); var strided_slice_gpu_1 = require("./strided_slice_gpu"); var tex_util = require("./tex_util"); var tex_util_1 = require("./tex_util"); var texture_manager_1 = require("./texture_manager"); var tile_gpu_1 = require("./tile_gpu"); var transpose_gpu_1 = require("./transpose_gpu"); var transpose_packed_gpu_1 = require("./transpose_packed_gpu"); var unary_op = require("./unaryop_gpu"); var unaryop_gpu_1 = require("./unaryop_gpu"); var unary_packed_op = require("./unaryop_packed_gpu"); var unaryop_packed_gpu_1 = require("./unaryop_packed_gpu"); var unpack_gpu_1 = require("./unpack_gpu"); var webgl_util = require("./webgl_util"); var binaryCaches = {}; function getBinaryCache(webGLVersion) { if (webGLVersion in binaryCaches) { return binaryCaches[webGLVersion]; } binaryCaches[webGLVersion] = {}; return binaryCaches[webGLVersion]; } exports.getBinaryCache = getBinaryCache; function mapActivationToShaderProgram(activation, packed) { if (packed === void 0) { packed = false; } if (activation === 'linear') { if (packed) { return unary_packed_op.LINEAR; } return unary_op.LINEAR; } else if (activation === 'relu') { if (packed) { return unary_packed_op.RELU; } return unary_op.RELU; } else if (activation === 'elu') { if (packed) { return unary_packed_op.ELU; } return unary_op.ELU; } else if (activation === 'relu6') { if (packed) { return unary_packed_op.RELU6; } return unary_op.RELU6; } else if (activation === 'prelu') { if (packed) { return binaryop_packed_gpu.PRELU; } return binaryop_gpu.PRELU; } throw new Error("Activation " + activation + " has not been implemented for the WebGL backend."); } // Empirically determined constant used to determine size threshold for handing // off execution to the CPU. var CPU_HANDOFF_SIZE_THRESHOLD = 128; // Empirically determined constant used to decide the number of MB on GPU // before we warn about high memory use. The MB are this constant * screen area // * dpi / 1024 / 1024. var BEFORE_PAGING_CONSTANT = 600; function numMBBeforeWarning() { if (environment_1.env().global.screen == null) { return 1024; // 1 GB. } return (environment_1.env().global.screen.height * environment_1.env().global.screen.width * window.devicePixelRatio) * BEFORE_PAGING_CONSTANT / 1024 / 1024; } // Empirically determined minimal shared dimension in matmul before we forward // to a.mul(b).sum() in order to take advantage of GPU parallelism. See // https://github.com/tensorflow/tfjs-core/pull/1379 for benchmarks. exports.MATMUL_SHARED_DIM_THRESHOLD = 1000; var MathBackendWebGL = /** @class */ (function (_super) { __extends(MathBackendWebGL, _super); function MathBackendWebGL(gpgpu) { var _this = _super.call(this) || this; // Maps data ids that have a pending read operation, to list of subscribers. _this.pendingRead = new WeakMap(); // List of data ids that are scheduled for disposal, but are waiting on a // pending read operation. _this.pendingDisposal = new WeakSet(); // Used to count the number of 'shallow' sliced tensors that point to the // same data id. _this.dataRefCount = new WeakMap(); _this.numBytesInGPU = 0; // Accumulated time spent (including blocking) in uploading data to webgl. _this.uploadWaitMs = 0; // Accumulated time spent (including blocking in downloading data from webgl. _this.downloadWaitMs = 0; _this.warnedAboutMemory = false; _this.pendingDeletes = 0; _this.disposed = false; if (!environment_1.env().getBool('HAS_WEBGL')) { throw new Error('WebGL is not supported on this device'); } if (gpgpu == null) { var gl = canvas_util_1.getWebGLContext(environment_1.env().getNumber('WEBGL_VERSION')); _this.binaryCache = getBinaryCache(environment_1.env().getNumber('WEBGL_VERSION')); _this.gpgpu = new gpgpu_context_1.GPGPUContext(gl); _this.canvas = gl.canvas; _this.gpgpuCreatedLocally = true; } else { _this.gpgpu = gpgpu; _this.binaryCache = {}; _this.gpgpuCreatedLocally = false; _this.canvas = gpgpu.gl.canvas; } _this.textureManager = new texture_manager_1.TextureManager(_this.gpgpu); _this.numMBBeforeWarning = numMBBeforeWarning(); _this.texData = new backend_1.DataStorage(_this, engine_1.ENGINE); return _this; } MathBackendWebGL.prototype.numDataIds = function () { return this.texData.numDataIds() + (this.cpuBackend ? this.cpuBackend.numDataIds() : 0) - this.pendingDeletes; }; MathBackendWebGL.prototype.write = function (values, shape, dtype) { if (environment_1.env().getBool('DEBUG')) { this.checkNumericalProblems(values); } if (dtype === 'complex64' && values != null) { throw new Error("Cannot write to a complex64 dtype. " + "Please use tf.complex(real, imag)."); } var dataId = {}; this.texData.set(dataId, { shape: shape, dtype: dtype, values: values, usage: tex_util_1.TextureUsage.UPLOAD }); return dataId; }; MathBackendWebGL.prototype.move = function (dataId, values, shape, dtype) { if (environment_1.env().getBool('DEBUG')) { this.checkNumericalProblems(values); } if (dtype === 'complex64') { throw new Error("Cannot write to a complex64 dtype. " + "Please use tf.complex(real, imag)."); } this.texData.set(dataId, { shape: shape, dtype: dtype, values: values, usage: tex_util_1.TextureUsage.UPLOAD }); }; MathBackendWebGL.prototype.readSync = function (dataId) { var texData = this.texData.get(dataId); var values = texData.values, dtype = texData.dtype, complexTensors = texData.complexTensors, slice = texData.slice, shape = texData.shape, isPacked = texData.isPacked; if (slice != null) { var program = void 0; if (isPacked) { program = new unaryop_packed_gpu_1.UnaryOpPackedProgram(shape, unary_op.CLONE); } else { program = new unaryop_gpu_1.UnaryOpProgram(shape, unary_op.CLONE); } var res = this.runWebGLProgram(program, [{ dataId: dataId, shape: shape, dtype: dtype }], dtype); var data = this.readSync(res.dataId); this.disposeData(res.dataId); return data; } if (values != null) { return this.convertAndCacheOnCPU(dataId); } if (dtype === 'string') { return values; } var shouldTimeProgram = this.activeTimers != null; var start; if (shouldTimeProgram) { start = util.now(); } var result; if (dtype === 'complex64') { var realValues = complexTensors.real.dataSync(); var imagValues = complexTensors.imag.dataSync(); result = complex_util_1.mergeRealAndImagArrays(realValues, imagValues); } else { result = this.getValuesFromTexture(dataId); } if (shouldTimeProgram) { this.downloadWaitMs += util.now() - start; } return this.convertAndCacheOnCPU(dataId, result); }; MathBackendWebGL.prototype.read = function (dataId) { return __awaiter(this, void 0, void 0, function () { var subscribers_1, texData, values, shape, slice, dtype, complexTensors, isPacked, program, res, data, buffer, tmpDownloadTarget, tmpData, vals, ps, realValues, imagValues, size, dTypeVals, subscribers; var _a; return __generator(this, function (_b) { switch (_b.label) { case 0: if (this.pendingRead.has(dataId)) { subscribers_1 = this.pendingRead.get(dataId); return [2 /*return*/, new Promise(function (resolve) { return subscribers_1.push(resolve); })]; } texData = this.texData.get(dataId); values = texData.values, shape = texData.shape, slice = texData.slice, dtype = texData.dtype, complexTensors = texData.complexTensors, isPacked = texData.isPacked; if (slice != null) { program = void 0; if (isPacked) { program = new unaryop_packed_gpu_1.UnaryOpPackedProgram(shape, unary_op.CLONE); } else { program = new unaryop_gpu_1.UnaryOpProgram(shape, unary_op.CLONE); } res = this.runWebGLProgram(program, [{ dataId: dataId, shape: shape, dtype: dtype }], dtype); data = this.read(res.dataId); this.disposeData(res.dataId); return [2 /*return*/, data]; } if (values != null) { return [2 /*return*/, this.convertAndCacheOnCPU(dataId)]; } if (!environment_1.env().getBool('WEBGL_DOWNLOAD_FLOAT_ENABLED') && environment_1.env().getNumber('WEBGL_VERSION') === 2) { throw new Error("tensor.data() with WEBGL_DOWNLOAD_FLOAT_ENABLED=false and " + "WEBGL_VERSION=2 not yet supported."); } buffer = null; if (dtype !== 'complex64' && environment_1.env().get('WEBGL_BUFFER_SUPPORTED')) { // Possibly copy the texture into a buffer before inserting a fence. tmpDownloadTarget = this.decode(dataId); tmpData = this.texData.get(tmpDownloadTarget.dataId); buffer = (_a = this.gpgpu).createBufferFromTexture.apply(_a, [tmpData.texture].concat(tex_util.getDenseTexShape(shape))); } this.pendingRead.set(dataId, []); if (!(dtype !== 'complex64')) return [3 /*break*/, 2]; // Create a fence and wait for it to resolve. return [4 /*yield*/, this.gpgpu.createAndWaitForFence()]; case 1: // Create a fence and wait for it to resolve. _b.sent(); _b.label = 2; case 2: if (!(dtype === 'complex64')) return [3 /*break*/, 4]; return [4 /*yield*/, Promise.all([complexTensors.real.data(), complexTensors.imag.data()])]; case 3: ps = _b.sent(); realValues = ps[0]; imagValues = ps[1]; vals = complex_util_1.mergeRealAndImagArrays(realValues, imagValues); return [3 /*break*/, 5]; case 4: if (buffer == null) { vals = this.getValuesFromTexture(dataId); } else { size = util.sizeFromShape(shape); vals = this.gpgpu.downloadFloat32MatrixFromBuffer(buffer, size); } _b.label = 5; case 5: if (tmpDownloadTarget != null) { this.disposeData(tmpDownloadTarget.dataId); } dTypeVals = this.convertAndCacheOnCPU(dataId, vals); subscribers = this.pendingRead.get(dataId); this.pendingRead.delete(dataId); // Notify all pending reads. subscribers.forEach(function (resolve) { return resolve(dTypeVals); }); if (this.pendingDisposal.has(dataId)) { this.pendingDisposal.delete(dataId); this.disposeData(dataId); this.pendingDeletes--; } return [2 /*return*/, dTypeVals]; } }); }); }; MathBackendWebGL.prototype.checkNumericalProblems = function (values) { if (values == null) { return; } for (var i = 0; i < values.length; i++) { var num = values[i]; if (!webgl_util.canBeRepresented(num)) { if (environment_1.env().getBool('WEBGL_RENDER_FLOAT32_CAPABLE')) { throw Error("The value " + num + " cannot be represented with your " + "current settings. Consider enabling float32 rendering: " + "'tf.env().set('WEBGL_RENDER_FLOAT32_ENABLED', true);'"); } throw Error("The value " + num + " cannot be represented on this device."); } } }; MathBackendWebGL.prototype.getValuesFromTexture = function (dataId) { var _a; var _b = this.texData.get(dataId), shape = _b.shape, dtype = _b.dtype, isPacked = _b.isPacked; var size = util.sizeFromShape(shape); if (environment_1.env().getBool('WEBGL_DOWNLOAD_FLOAT_ENABLED')) { var tmpTarget = this.decode(dataId); var tmpData_1 = this.texData.get(tmpTarget.dataId); var vals_1 = (_a = this.gpgpu).downloadMatrixFromPackedTexture.apply(_a, [tmpData_1.texture].concat(tex_util.getDenseTexShape(shape))).subarray(0, size); this.disposeData(tmpTarget.dataId); return vals_1; } var shouldUsePackedProgram = environment_1.env().getBool('WEBGL_PACK') && isPacked === true; var outputShape = shouldUsePackedProgram ? webgl_util.getShapeAs3D(shape) : shape; var program = shouldUsePackedProgram ? new encode_float_packed_gpu_1.EncodeFloatPackedProgram(outputShape) : new encode_float_gpu_1.EncodeFloatProgram(outputShape); var output = this.runWebGLProgram(program, [{ shape: outputShape, dtype: dtype, dataId: dataId }], 'float32'); var tmpData = this.texData.get(output.dataId); var vals = this.gpgpu .downloadByteEncodedFloatMatrixFromOutputTexture(tmpData.texture, tmpData.texShape[0], tmpData.texShape[1]) .subarray(0, size); this.disposeData(output.dataId); return vals; }; MathBackendWebGL.prototype.time = function (f) { return __awaiter(this, void 0, void 0, function () { var oldActiveTimers, newActiveTimers, outerMostTime, flattenedActiveTimerQueries, flattenedActiveTimerNames, res, kernelMs_1; return __generator(this, function (_a) { switch (_a.label) { case 0: oldActiveTimers = this.activeTimers; newActiveTimers = []; outerMostTime = false; if (this.programTimersStack == null) { this.programTimersStack = newActiveTimers; outerMostTime = true; } else { this.activeTimers.push(newActiveTimers); } this.activeTimers = newActiveTimers; f(); flattenedActiveTimerQueries = util.flatten(this.activeTimers.map(function (d) { return d.query; })) .filter(function (d) { return d != null; }); flattenedActiveTimerNames = util.flatten(this.activeTimers.map(function (d) { return d.name; })) .filter(function (d) { return d != null; }); this.activeTimers = oldActiveTimers; if (outerMostTime) { this.programTimersStack = null; } res = { uploadWaitMs: this.uploadWaitMs, downloadWaitMs: this.downloadWaitMs, kernelMs: null, wallMs: null // will be filled by the engine }; if (!(environment_1.env().getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') > 0)) return [3 /*break*/, 2]; return [4 /*yield*/, Promise.all(flattenedActiveTimerQueries)]; case 1: kernelMs_1 = _a.sent(); res['kernelMs'] = util.sum(kernelMs_1); res['getExtraProfileInfo'] = function () { return kernelMs_1.map(function (d, i) { return ({ name: flattenedActiveTimerNames[i], ms: d }); }) .map(function (d) { return d.name + ": " + d.ms; }) .join(', '); }; return [3 /*break*/, 3]; case 2: res['kernelMs'] = { error: 'WebGL query timers are not supported in this environment.' }; _a.label = 3; case 3: this.uploadWaitMs = 0; this.downloadWaitMs = 0; return [2 /*return*/, res]; } }); }); }; MathBackendWebGL.prototype.memory = function () { return { unreliable: false, numBytesInGPU: this.numBytesInGPU }; }; MathBackendWebGL.prototype.startTimer = function () { if (environment_1.env().getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') > 0) { return this.gpgpu.beginQuery(); } return { startMs: util.now(), endMs: null }; }; MathBackendWebGL.prototype.endTimer = function (query) { if (environment_1.env().getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') > 0) { this.gpgpu.endQuery(); return query; } query.endMs = util.now(); return query; }; MathBackendWebGL.prototype.getQueryTime = function (query) { return __awaiter(this, void 0, void 0, function () { var timerQuery; return __generator(this, function (_a) { if (environment_1.env().getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') > 0) { return [2 /*return*/, this.gpgpu.waitForQueryAndGetTime(query)]; } timerQuery = query; return [2 /*return*/, timerQuery.endMs - timerQuery.startMs]; }); }); }; MathBackendWebGL.prototype.disposeData = function (dataId) { if (this.pendingDisposal.has(dataId)) { return; } if (this.pendingRead.has(dataId)) { this.pendingDisposal.add(dataId); this.pendingDeletes++; return; } // No-op if already disposed. if (!this.texData.has(dataId)) { return; } this.releaseGPUData(dataId); var complexTensors = this.texData.get(dataId).complexTensors; if (complexTensors != null) { complexTensors.real.dispose(); complexTensors.imag.dispose(); } this.texData.delete(dataId); }; MathBackendWebGL.prototype.releaseGPUData = function (dataId) { var _a = this.texData.get(dataId), texture = _a.texture, dtype = _a.dtype, texShape = _a.texShape, usage = _a.usage, isPacked = _a.isPacked, slice = _a.slice; var key = slice && slice.origDataId || dataId; var refCount = this.dataRefCount.get(key); if (refCount > 1) { this.dataRefCount.set(key, refCount - 1); } else { this.dataRefCount.delete(key); if (texture != null) { this.numBytesInGPU -= this.computeBytes(texShape, dtype); this.textureManager.releaseTexture(texture, texShape, usage, isPacked); } } var texData = this.texData.get(dataId); texData.texture = null; texData.texShape = null; texData.isPacked = false; texData.slice = null; }; MathBackendWebGL.prototype.getTexture = function (dataId) { this.uploadToGPU(dataId); return this.texData.get(dataId).texture; }; /** * Returns internal information for the specific data bucket. Used in unit * tests. */ MathBackendWebGL.prototype.getDataInfo = function (dataId) { return this.texData.get(dataId); }; MathBackendWebGL.prototype.getCPUBackend = function () { if (!environment_1.env().getBool('WEBGL_CPU_FORWARD')) { return null; } if (this.cpuBackend == null) { this.cpuBackend = engine_1.ENGINE.findBackend('cpu'); } return this.cpuBackend; }; /* Tests whether all the inputs to an op are small and on the CPU. This heuristic determines when it would be faster to execute a kernel on the CPU. WebGL kernels opt into running this check and forwarding when appropriate. TODO(https://github.com/tensorflow/tfjs/issues/872): Develop a more sustainable strategy for optimizing backend execution of ops. */ MathBackendWebGL.prototype.shouldExecuteOnCPU = function (inputs, sizeThreshold) { var _this = this; if (sizeThreshold === void 0) { sizeThreshold = CPU_HANDOFF_SIZE_THRESHOLD; } return this.getCPUBackend() != null && inputs.every(function (input) { return _this.texData.get(input.dataId).texture == null && input.size < sizeThreshold; }); }; MathBackendWebGL.prototype.getGPGPUContext = function () { return this.gpgpu; }; MathBackendWebGL.prototype.complex = function (real, imag) { var result = this.makeOutput(real.shape, 'complex64'); var resultData = this.texData.get(result.dataId); // The backend owns the reference to the underlying real and imaginary // clones. These will explicitly get disposed when the complex tensor is // disposed. resultData.complexTensors = { real: engine_1.ENGINE.keep(real.clone()), imag: engine_1.ENGINE.keep(imag.clone()) }; return result; }; MathBackendWebGL.prototype.real = function (input) { var resultData = this.texData.get(input.dataId); return resultData.complexTensors.real.clone(); }; MathBackendWebGL.prototype.imag = function (input) { var resultData = this.texData.get(input.dataId); return resultData.complexTensors.imag.clone(); }; MathBackendWebGL.prototype.slice = function (x, begin, size) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.slice(x, begin, size); } // Short-circuit computation if the slice is zero-sized. if (util.sizeFromShape(size) === 0) { return tensor_ops_1.tensor([], size, x.dtype); } var isPacked = this.texData.get(x.dataId).isPacked; var isContinous = slice_util.isSliceContinous(x.shape, begin, size); if (isPacked || !isContinous) { var program = environment_1.env().getBool('WEBGL_PACK_ARRAY_OPERATIONS') ? new slice_packed_gpu_1.SlicePackedProgram(size) : new slice_gpu_1.SliceProgram(size); var customSetup = program.getCustomSetupFunc(begin); return this.compileAndRun(program, [x], null, customSetup); } this.uploadToGPU(x.dataId); return this.shallowSlice(x, begin, size); }; MathBackendWebGL.prototype.shallowSlice = function (x, begin, size) { var xTexData = this.texData.get(x.dataId); var t = this.makeOutput(size, x.dtype); var newTexData = this.texData.get(t.dataId); // Copy texture data from the original tensor. Object.assign(newTexData, xTexData); newTexData.shape = size; newTexData.dtype = x.dtype; var flatOffset = slice_util.computeFlatOffset(begin, x.strides); if (xTexData.slice) { // We are slicing an already sliced tensor, so we have to accumulate // the offset. flatOffset += xTexData.slice.flatOffset; } newTexData.slice = { flatOffset: flatOffset, // Point to the original dataId, which is used to do ref counting. origDataId: xTexData.slice && xTexData.slice.origDataId || x.dataId }; // Increase the ref count for that data bucket. var refCount = this.dataRefCount.get(newTexData.slice.origDataId) || 1; this.dataRefCount.set(newTexData.slice.origDataId, refCount + 1); return t; }; MathBackendWebGL.prototype.stridedSlice = function (x, begin, end, strides) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.stridedSlice(x, begin, end, strides); } var outShape = slice_util.computeOutShape(begin, end, strides); if (outShape.some(function (axis) { return axis === 0; })) { return tensor_ops_1.tensor([], outShape); } var program = new strided_slice_gpu_1.StridedSliceProgram(begin, strides, outShape); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.reverse = function (x, axis) { var program = environment_1.env().getBool('WEBGL_PACK_ARRAY_OPERATIONS') ? new reverse_packed_gpu_1.ReversePackedProgram(x.shape, axis) : new reverse_gpu_1.ReverseProgram(x.shape, axis); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.concat = function (tensors, axis) { if (tensors[0].dtype === 'complex64') { var reals = tensors.map(function (t) { return complex_ops_1.real(t); }); var imags = tensors.map(function (t) { return complex_ops_1.imag(t); }); return complex_ops_1.complex(this.concat(reals, axis), this.concat(imags, axis)); } if (this.shouldExecuteOnCPU(tensors)) { return this.cpuBackend.concat(tensors, axis); } if (tensors.length === 1) { return tensors[0]; } if (tensors.length > environment_1.env().getNumber('WEBGL_MAX_TEXTURES_IN_SHADER')) { var midIndex = Math.floor(tensors.length / 2); var leftSide = this.concat(tensors.slice(0, midIndex), axis); var rightSide = this.concat(tensors.slice(midIndex), axis); return this.concat([leftSide, rightSide], axis); } if (environment_1.env().getBool('WEBGL_PACK_ARRAY_OPERATIONS') && tensors[0].rank > 1) { var program_1 = new concat_packed_gpu_1.ConcatPackedProgram(tensors.map(function (t) { return t.shape; }), axis); return this.compileAndRun(program_1, tensors); } // Any concat of n-dimensional tensors across any axis can be reduced to // a concatenation of two-dimensional tensors across the axis 1 by first // partitioning the axes of the original tensors into those less than the // axis to be concatenated and the rest. Then reshape the tensors // into a two-dimensional tensor by collapsing these two sets of axes and // concatenate the resulting matrices across the axis 1, finally reshaping // the result to have the proper shape. var outShape = concat_util_1.computeOutShape(tensors.map(function (t) { return t.shape; }), axis); var tensors2D = tensors.map(function (t) { return t.as2D(-1, util_1.sizeFromShape(t.shape.slice(axis))); }); var program = new concat_gpu_1.ConcatProgram(tensors2D.map(function (t) { return t.shape; })); var res = this.compileAndRun(program, tensors2D); return res.reshape(outShape); }; MathBackendWebGL.prototype.neg = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.neg(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.NEG, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.NEG); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.batchMatMul = function (a, b, transposeA, transposeB) { var outerShapeA = transposeA ? a.shape[2] : a.shape[1]; var outerShapeB = transposeB ? b.shape[1] : b.shape[2]; var sharedDim = transposeA ? a.shape[1] : a.shape[2]; var _a = a.shape, batch = _a[0]; // Since the matrices are vectors, it is faster to call mul().sum() // because sum() is O(sqrt(N)) due to divide-and-conquer. if ((outerShapeA === 1 || outerShapeB === 1) && sharedDim > exports.MATMUL_SHARED_DIM_THRESHOLD) { if (transposeA) { a = a.transpose([0, 2, 1]); } if (transposeB) { b = b.transpose([0, 2, 1]); } var a3D = outerShapeB === 1 ? a : a.as3D(batch, sharedDim, 1); var axis = outerShapeB === 1 ? 2 : 1; var b3D = outerShapeB === 1 ? b.as3D(batch, 1, sharedDim) : b; return this.multiply(a3D, b3D).sum(axis, true /* keepDims */); } var dtype = types_1.upcastType(a.dtype, b.dtype); var program = new mulmat_packed_gpu_1.MatMulPackedProgram(a.shape, [batch, outerShapeA, outerShapeB], transposeA, transposeB); return this.compileAndRun(program, [a, b], dtype); }; MathBackendWebGL.prototype.fusedBatchMatMul = function (_a) { var a = _a.a, b = _a.b, transposeA = _a.transposeA, transposeB = _a.transposeB, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights; var outerShapeA = transposeA ? a.shape[2] : a.shape[1]; var outerShapeB = transposeB ? b.shape[1] : b.shape[2]; var _b = a.shape, batch = _b[0]; var dtype = types_1.upcastType(a.dtype, b.dtype); var hasBias = bias != null; var hasPreluActivationWeights = preluActivationWeights != null; var fusedActivation = activation ? mapActivationToShaderProgram(activation, true) : null; var program = new mulmat_packed_gpu_1.MatMulPackedProgram(a.shape, [batch, outerShapeA, outerShapeB], transposeA, transposeB, hasBias, fusedActivation, hasPreluActivationWeights); var inputs = [a, b]; if (bias) { inputs.push(bias); } if (preluActivationWeights) { inputs.push(preluActivationWeights); } return this.compileAndRun(program, inputs, dtype); }; MathBackendWebGL.prototype.multiply = function (a, b) { if (a.dtype === 'complex64') { var aData = this.texData.get(a.dataId); var bData = this.texData.get(b.dataId); var realProgram = new binaryop_complex_gpu_1.BinaryOpComplexProgram(binaryop_complex_gpu.COMPLEX_MULTIPLY.REAL, a.shape, b.shape); var imagProgram = new binaryop_complex_gpu_1.BinaryOpComplexProgram(binaryop_complex_gpu.COMPLEX_MULTIPLY.IMAG, a.shape, b.shape); var inputs = [ this.makeComplexComponentTensorInfo(a, aData.complexTensors.real), this.makeComplexComponentTensorInfo(a, aData.complexTensors.imag), this.makeComplexComponentTensorInfo(b, bData.complexTensors.real), this.makeComplexComponentTensorInfo(b, bData.complexTensors.imag) ]; var real_1 = this.compileAndRun(realProgram, inputs); var imag_1 = this.compileAndRun(imagProgram, inputs); var complex_1 = this.complex(real_1, imag_1); real_1.dispose(); imag_1.dispose(); return complex_1; } if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.multiply(a, b); } if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_gpu.MUL, a.dtype); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.MUL, a.shape, b.shape); return this.compileAndRun(program, [a, b], a.dtype); }; MathBackendWebGL.prototype.batchNormalization = function (x, mean, variance, varianceEpsilon, scale, offset) { var inputs = [x, mean, variance]; var offsetShape = null; if (offset != null) { offsetShape = offset.shape; inputs.push(offset); } var scaleShape = null; if (scale != null) { scaleShape = scale.shape; inputs.push(scale); } if (environment_1.env().getBool('WEBGL_PACK_NORMALIZATION')) { var batchNormPackedProgram = new batchnorm_packed_gpu_1.BatchNormPackedProgram(x.shape, mean.shape, variance.shape, offsetShape, scaleShape, varianceEpsilon); return this.compileAndRun(batchNormPackedProgram, inputs); } var batchNormProgram = new batchnorm_gpu_1.BatchNormProgram(x.shape, mean.shape, variance.shape, offsetShape, scaleShape, varianceEpsilon); return this.compileAndRun(batchNormProgram, inputs); }; MathBackendWebGL.prototype.localResponseNormalization4D = function (x, radius, bias, alpha, beta) { var program = environment_1.env().getBool('WEBGL_PACK_NORMALIZATION') ? new lrn_packed_gpu_1.LRNPackedProgram(x.shape, radius, bias, alpha, beta) : new lrn_gpu_1.LRNProgram(x.shape, radius, bias, alpha, beta); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.LRNGrad = function (dy, inputImage, outputImage, depthRadius, bias, alpha, beta) { var program = new lrn_grad_gpu_1.LRNGradProgram(inputImage.shape, depthRadius, bias, alpha, beta); return this.compileAndRun(program, [inputImage, outputImage, dy]); }; MathBackendWebGL.prototype.tile = function (x, reps) { if (x.dtype === 'string') { var data = this.readSync(x.dataId); var decodedData = data.map(function (d) { return util.decodeString(d); }); var buf = array_ops_1.buffer(x.shape, x.dtype, decodedData); return tile_impl_1.tile(buf, reps); } var program = new tile_gpu_1.TileProgram(x.shape, reps); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.pad = function (x, paddings, constantValue) { var program = environment_1.env().getBool('WEBGL_PACK_ARRAY_OPERATIONS') ? new pad_packed_gpu_1.PadPackedProgram(x.shape, paddings, constantValue) : new pad_gpu_1.PadProgram(x.shape, paddings, constantValue); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.transpose = function (x, perm) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.transpose(x, perm); } var program = environment_1.env().getBool('WEBGL_PACK_ARRAY_OPERATIONS') ? new transpose_packed_gpu_1.TransposePackedProgram(x.shape, perm) : new transpose_gpu_1.TransposeProgram(x.shape, perm); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.gather = function (x, indices, axis) { if (this.shouldExecuteOnCPU([x, indices])) { return this.cpuBackend.gather(x, indices, axis); } var program = new gather_gpu_1.GatherProgram(x.shape, indices.size, axis); return this.compileAndRun(program, [x, indices]); }; MathBackendWebGL.prototype.batchToSpaceND = function (x, blockShape, crops) { util.assert(x.rank <= 4, function () { return 'batchToSpaceND for rank > 4 with a WebGL backend not ' + 'implemented yet'; }); var prod = blockShape.reduce(function (a, b) { return a * b; }); var reshaped = array_ops_util.getReshaped(x.shape, blockShape, prod); var permuted = array_ops_util.getPermuted(reshaped.length, blockShape.length); var reshapedPermuted = array_ops_util.getReshapedPermuted(x.shape, blockShape, prod); var sliceBeginCoords = array_ops_util.getSliceBeginCoords(crops, blockShape.length); var sliceSize = array_ops_util.getSliceSize(reshapedPermuted, crops, blockShape.length); return x.reshape(reshaped) .transpose(permuted) .reshape(reshapedPermuted) .slice(sliceBeginCoords, sliceSize); }; MathBackendWebGL.prototype.spaceToBatchND = function (x, blockShape, paddings) { util.assert(x.rank <= 4, function () { return 'spaceToBatchND for rank > 4 with a WebGL backend not ' + 'implemented yet'; }); var prod = blockShape.reduce(function (a, b) { return a * b; }); var completePaddings = [[0, 0]]; completePaddings.push.apply(completePaddings, paddings); for (var i = 1 + blockShape.length; i < x.shape.length; ++i) { completePaddings.push([0, 0]); } var paddedX = x.pad(completePaddings); var reshapedPaddedShape = array_ops_util.getReshaped(paddedX.shape, blockShape, prod, false); var permutedReshapedPaddedPermutation = array_ops_util.getPermuted(reshapedPaddedShape.length, blockShape.length, false); var flattenShape = array_ops_util.getReshapedPermuted(paddedX.shape, blockShape, prod, false); return paddedX.reshape(reshapedPaddedShape) .transpose(permutedReshapedPaddedPermutation) .reshape(flattenShape); }; MathBackendWebGL.prototype.reduce = function (x, reduceType, dtype) { var batchSize = x.shape[0]; var inSize = x.shape[1]; var windowSize = reduce_util.computeOptimalWindowSize(inSize); var reduceInfo = { windowSize: windowSize, inSize: inSize, batchSize: batchSize }; var program = new reduce_gpu_1.ReduceProgram(reduceInfo, reduceType); var output = this.compileAndRun(program, [x], dtype); // No need to run another GPGPU program. if (output.shape[1] === 1) { return output; } return this.reduce(output, reduceType, dtype); }; MathBackendWebGL.prototype.argReduce = function (x, reduceType, bestIndicesA) { if (bestIndicesA === void 0) { bestIndicesA = null; } var batchSize = x.shape[0]; var inSize = x.shape[1]; if (bestIndicesA != null) { batchSize = bestIndicesA.shape[0]; inSize = bestIndicesA.shape[1]; } var windowSize = reduce_util.computeOptimalWindowSize(inSize); var reduceInfo = { windowSize: windowSize, inSize: inSize, batchSize: batchSize }; var program = new argminmax_gpu_1.ArgMinMaxProgram(reduceInfo, reduceType, bestIndicesA == null); var inputs = [x]; if (bestIndicesA != null) { inputs.push(bestIndicesA); } var output = this.compileAndRun(program, inputs, 'int32'); // No need to run another GPGPU program. if (output.shape[1] === 1) { return output; } return this.argReduce(x, reduceType, output); }; MathBackendWebGL.prototype.argReducePacked = function (x, reduceType, bestIndicesA) { if (bestIndicesA === void 0) { bestIndicesA = null; } var inShape = bestIndicesA != null ? bestIndicesA.shape : x.shape; var inSize = inShape[inShape.length - 1]; var windowSize = reduce_util.computeOptimalWindowSize(inSize); var program = new argminmax_packed_gpu_1.ArgMinMaxPackedProgram(inShape, windowSize, reduceType, bestIndicesA == null); var inputs = bestIndicesA == null ? [x] : [x, bestIndicesA]; var output = this.compileAndRun(program, inputs, 'int32'); if (output.rank === x.rank) { return this.argReducePacked(x, reduceType, output); } return output; }; MathBackendWebGL.prototype.sum = function (x, axes) { axis_util.assertAxesAreInnerMostDims('sum', axes, x.rank); var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); var outputDType = types_1.sumOutType(x.dtype); return this.reduce(a2D, 'sum', outputDType).reshape(outShape); }; MathBackendWebGL.prototype.prod = function (x, axes) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.prod(x, axes); } var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); var outputDType = types_1.sumOutType(x.dtype); return this.reduce(a2D, 'prod', outputDType).reshape(outShape); }; MathBackendWebGL.prototype.unsortedSegmentSum = function (x, segmentIds, numSegments) { var axis = 0; var permutation = axis_util.getAxesPermutation([axis], x.rank); var permutedX = x; if (permutation != null) { permutedX = x.transpose(permutation); axis = axis_util.getInnerMostAxes(1, x.rank)[0]; } var outShape = segment_util.computeOutShape(permutedX.shape, axis, numSegments); var inSize = util.sizeFromShape([permutedX.shape[axis]]); var a2D = permutedX.as2D(-1, inSize); var outputDType = types_1.sumOutType(x.dtype); var result = this.segOpCompute(a2D, 'unsortedSegmentSum', segmentIds, outputDType, numSegments) .reshape(outShape); if (permutation != null) { result = result.transpose(axis_util.getUndoAxesPermutation(permutation)); } return result; }; MathBackendWebGL.prototype.segOpCompute = function (x, segOpType, segmentIds, dtype, numSegments) { var batchSize = x.shape[0]; var inSize = x.shape[1]; var windowSize = segment_util.segOpComputeOptimalWindowSize(inSize, numSegments); var segOpInfo = { windowSize: windowSize, inSize: inSize, batchSize: batchSize, numSegments: numSegments }; var program = new segment_gpu_1.SegmentOpProgram(segOpInfo, segOpType); var output = this.compileAndRun(program, [x, segmentIds], dtype); // No need to run another GPGPU program. if (output.shape[1] === numSegments) { return output; } segmentIds = tensor_ops_1.range(0, numSegments).tile([inSize / windowSize]); return this.segOpCompute(output, segOpType, segmentIds, dtype, numSegments); }; MathBackendWebGL.prototype.argMinMaxReduce = function (x, axis, reduceType) { var axes = [axis]; axis_util.assertAxesAreInnerMostDims('arg' + reduceType.charAt(0).toUpperCase() + reduceType.slice(1), axes, x.rank); if (!environment_1.env().getBool('WEBGL_PACK_REDUCE') || x.rank <= 2) { var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); return this.argReduce(a2D, reduceType).reshape(outShape); } return this.argReducePacked(x, reduceType); }; MathBackendWebGL.prototype.argMin = function (x, axis) { return this.argMinMaxReduce(x, axis, 'min'); }; MathBackendWebGL.prototype.argMax = function (x, axis) { return this.argMinMaxReduce(x, axis, 'max'); }; MathBackendWebGL.prototype.cumsum = function (x, axis, exclusive, reverse) { if (axis !== x.rank - 1) { throw new Error("WebGL cumsum shader expects an inner-most axis=" + (x.rank - 1) + " " + ("but got axis=" + axis)); } var program = new cumsum_gpu_1.CumSumProgram(x.shape, exclusive, reverse); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.equal = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.EQUAL, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.EQUAL, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.notEqual = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.NOT_EQUAL, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.NOT_EQUAL, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.less = function (a, b) { if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.less(a, b); } if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.LESS, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.LESS, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.lessEqual = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.LESS_EQUAL, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.LESS_EQUAL, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.greater = function (a, b) { if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.greater(a, b); } if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.GREATER, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.GREATER, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.greaterEqual = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.GREATER_EQUAL, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.GREATER_EQUAL, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.logicalNot = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.LOGICAL_NOT); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.logicalAnd = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.LOGICAL_AND, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.LOGICAL_AND, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.logicalOr = function (a, b) { if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.LOGICAL_OR, 'bool'); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.LOGICAL_OR, a.shape, b.shape); return this.compileAndRun(program, [a, b], 'bool'); }; MathBackendWebGL.prototype.select = function (condition, a, b) { var program = new select_gpu_1.SelectProgram(condition.rank, a.shape, a.rank); return this.compileAndRun(program, [condition, a, b], types_1.upcastType(a.dtype, b.dtype)); }; MathBackendWebGL.prototype.where = function (condition) { log_1.warn('tf.where() in webgl locks the UI thread. ' + 'Call tf.whereAsync() instead'); var condVals = condition.dataSync(); return where_impl_1.whereImpl(condition.shape, condVals); }; MathBackendWebGL.prototype.topk = function (x, k, sorted) { var xVals = x.dataSync(); return topk_impl_1.topkImpl(xVals, x.shape, x.dtype, k, sorted); }; MathBackendWebGL.prototype.min = function (x, axes) { axis_util.assertAxesAreInnerMostDims('min', axes, x.rank); var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); return this.reduce(a2D, 'min', a2D.dtype).reshape(outShape); }; MathBackendWebGL.prototype.minimum = function (a, b) { if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.minimum(a, b); } var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.MIN, a.shape, b.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.MIN, a.shape, b.shape); return this.compileAndRun(program, [a, b]); }; MathBackendWebGL.prototype.mod = function (a, b) { var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.MOD, a.shape, b.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.MOD, a.shape, b.shape); return this.compileAndRun(program, [a, b]); }; MathBackendWebGL.prototype.max = function (x, axes) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.max(x, axes); } axis_util.assertAxesAreInnerMostDims('max', axes, x.rank); var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); return this.reduce(a2D, 'max', a2D.dtype).reshape(outShape); }; MathBackendWebGL.prototype.maximum = function (a, b) { if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.maximum(a, b); } var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.MAX, a.shape, b.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.MAX, a.shape, b.shape); return this.compileAndRun(program, [a, b]); }; MathBackendWebGL.prototype.all = function (x, axes) { axis_util.assertAxesAreInnerMostDims('all', axes, x.rank); var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); return this.reduce(a2D, 'all', a2D.dtype).reshape(outShape); }; MathBackendWebGL.prototype.any = function (x, axes) { axis_util.assertAxesAreInnerMostDims('any', axes, x.rank); var _a = axis_util.computeOutAndReduceShapes(x.shape, axes), outShape = _a[0], reduceShape = _a[1]; var inSize = util.sizeFromShape(reduceShape); var a2D = x.as2D(-1, inSize); return this.reduce(a2D, 'any', a2D.dtype).reshape(outShape); }; MathBackendWebGL.prototype.realDivide = function (a, b) { var op = binaryop_gpu.DIV; var outputDtype = 'float32'; if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { var checkOutOfBounds = true; return this.packedBinaryOp(a, b, binaryop_packed_gpu.DIV, outputDtype, checkOutOfBounds); } var program = new binaryop_gpu_1.BinaryOpProgram(op, a.shape, b.shape); return this.compileAndRun(program, [a, b], outputDtype); }; MathBackendWebGL.prototype.floorDiv = function (a, b) { var op = binaryop_gpu.INT_DIV; var outputDtype = 'int32'; if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_packed_gpu.INT_DIV, outputDtype); } var program = new binaryop_gpu_1.BinaryOpProgram(op, a.shape, b.shape); return this.compileAndRun(program, [a, b], outputDtype); }; MathBackendWebGL.prototype.add = function (a, b) { if (a.dtype === 'complex64' && b.dtype === 'complex64') { return this.complexSeparableBinaryOp(a, b, binaryop_gpu.ADD); } if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.add(a, b); } var dtype = types_1.upcastType(a.dtype, b.dtype); if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_gpu.ADD, dtype); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.ADD, a.shape, b.shape); return this.compileAndRun(program, [a, b], dtype); }; MathBackendWebGL.prototype.packedUnaryOp = function (x, op, dtype) { var program = new unaryop_packed_gpu_1.UnaryOpPackedProgram(x.shape, op); return this.compileAndRun(program, [x], dtype); }; MathBackendWebGL.prototype.packedBinaryOp = function (a, b, op, dtype, checkOutOfBounds) { if (checkOutOfBounds === void 0) { checkOutOfBounds = false; } var program = new binaryop_packed_gpu_1.BinaryOpPackedProgram(op, a.shape, b.shape, checkOutOfBounds); return this.compileAndRun(program, [a, b], dtype); }; /** * Computes a complex binary operation that can be decomposed into a simple * binary operation on both the real and imagary parts. */ MathBackendWebGL.prototype.complexSeparableBinaryOp = function (a, b, op) { var _this = this; var aData = this.texData.get(a.dataId); var bData = this.texData.get(b.dataId); var _a = [ [aData.complexTensors.real, bData.complexTensors.real], [aData.complexTensors.imag, bData.complexTensors.imag] ].map(function (complexParts) { var aPart = complexParts[0], bPart = complexParts[1]; var aHandle = _this.makeComplexComponentTensorInfo(a, aPart); var bHandle = _this.makeComplexComponentTensorInfo(b, bPart); var program = new binaryop_gpu_1.BinaryOpProgram(op, a.shape, b.shape); return _this.compileAndRun(program, [aHandle, bHandle], types_1.upcastType(aPart.dtype, bPart.dtype)); }), real = _a[0], imag = _a[1]; var complex = this.complex(real, imag); real.dispose(); imag.dispose(); return complex; }; // Returns a TensorInfo with the complex shape and the dataId of the // underlying part. We need to do this because a reshaped complex tensor is // not reflected in its parts. MathBackendWebGL.prototype.makeComplexComponentTensorInfo = function (complexTensor, complexPart) { return { dataId: complexPart.dataId, dtype: complexPart.dtype, shape: complexTensor.shape }; }; MathBackendWebGL.prototype.addN = function (tensors) { if (tensors.length === 1) { return tensors[0]; } // Limit the number of uploaded textures for optimization. if (tensors.length > environment_1.env().get('WEBGL_MAX_TEXTURES_IN_SHADER')) { var midIndex = Math.floor(tensors.length / 2); var leftSide = this.addN(tensors.slice(0, midIndex)); var rightSide = this.addN(tensors.slice(midIndex)); return this.addN([leftSide, rightSide]); } var dtype = tensors.map(function (t) { return t.dtype; }).reduce(function (d1, d2) { return types_1.upcastType(d1, d2); }); var shapes = tensors.map(function (t) { return t.shape; }); // We can make sure shapes are identical in op level. var usePackedOp = environment_1.env().getBool('WEBGL_PACK'); var program = usePackedOp ? new addn_packed_gpu_1.AddNPackedProgram(tensors[0].shape, shapes) : new addn_gpu_1.AddNProgram(tensors[0].shape, shapes); return this.compileAndRun(program, tensors, dtype); }; MathBackendWebGL.prototype.subtract = function (a, b) { if (a.dtype === 'complex64' && b.dtype === 'complex64') { return this.complexSeparableBinaryOp(a, b, binaryop_gpu.SUB); } if (this.shouldExecuteOnCPU([a, b])) { return this.cpuBackend.subtract(a, b); } var dtype = types_1.upcastType(a.dtype, b.dtype); if (environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS')) { return this.packedBinaryOp(a, b, binaryop_gpu.SUB, a.dtype); } var program = new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.SUB, a.shape, b.shape); return this.compileAndRun(program, [a, b], dtype); }; MathBackendWebGL.prototype.pow = function (a, b) { var usePackedOp = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS'); var program = usePackedOp ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.POW, a.shape, b.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.POW, a.shape, b.shape); var dtype = types_1.upcastType(a.dtype, b.dtype); return this.compileAndRun(program, [a, b], dtype); }; MathBackendWebGL.prototype.ceil = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.ceil(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.CEIL, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.CEIL); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.floor = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.floor(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.FLOOR, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.FLOOR); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.sign = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SIGN); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.isNaN = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.IS_NAN); return this.compileAndRun(program, [x], 'bool'); }; MathBackendWebGL.prototype.isInf = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.IS_INF); return this.compileAndRun(program, [x], 'bool'); }; MathBackendWebGL.prototype.isFinite = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.IS_FINITE); return this.compileAndRun(program, [x], 'bool'); }; MathBackendWebGL.prototype.round = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ROUND); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.exp = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.exp(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.EXP, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.EXP); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.expm1 = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.expm1(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.EXPM1, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.EXPM1); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.softmax = function (logits, dim) { var axes = util.parseAxisParam([dim], logits.shape); var maxLogit = this.max(logits, axes); var expandedShape = axis_util.expandShapeToKeepDim(maxLogit.shape, axes); var a = this.subtract(logits, maxLogit.reshape(expandedShape)); var b = this.exp(a); var sumExp = this.sum(b, axes).reshape(expandedShape); return this.realDivide(b, sumExp); }; MathBackendWebGL.prototype.log = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.log(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_packed_op.LOG, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.LOG); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.log1p = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.LOG1P); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.sqrt = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SQRT); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.rsqrt = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.rsqrt(x); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.RSQRT); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.reciprocal = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.RECIPROCAL); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.relu = function (x) { var program; if (environment_1.env().getBool('WEBGL_PACK')) { program = new unaryop_packed_gpu_1.UnaryOpPackedProgram(x.shape, unary_packed_op.RELU); } else { program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.RELU); } return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.relu6 = function (x) { var program; if (environment_1.env().getBool('WEBGL_PACK')) { program = new unaryop_packed_gpu_1.UnaryOpPackedProgram(x.shape, unary_packed_op.RELU6); } else { program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.RELU6); } return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.prelu = function (x, alpha) { var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.PRELU, x.shape, alpha.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.PRELU, x.shape, alpha.shape); return this.compileAndRun(program, [x, alpha]); }; MathBackendWebGL.prototype.elu = function (x) { if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_packed_op.ELU, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ELU); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.eluDer = function (dy, y) { var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.ELU_DER, dy.shape, y.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.ELU_DER, dy.shape, y.shape); return this.compileAndRun(program, [dy, y]); }; MathBackendWebGL.prototype.selu = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SELU); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.int = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.TO_INT); return this.compileAndRun(program, [x], 'int32'); }; MathBackendWebGL.prototype.clip = function (x, min, max) { var program; if (environment_1.env().getBool('WEBGL_PACK_CLIP')) { program = new clip_packed_gpu_1.ClipPackedProgram(x.shape); } else { program = new clip_gpu_1.ClipProgram(x.shape); } var customSetup = program.getCustomSetupFunc(min, max); return this.compileAndRun(program, [x], null, customSetup); }; MathBackendWebGL.prototype.abs = function (x) { if (this.shouldExecuteOnCPU([x])) { return this.cpuBackend.abs(x); } if (environment_1.env().getBool('WEBGL_PACK_UNARY_OPERATIONS')) { return this.packedUnaryOp(x, unary_op.ABS, x.dtype); } var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ABS); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.complexAbs = function (x) { var xData = this.texData.get(x.dataId); var program = new complex_abs_gpu_1.ComplexAbsProgram(x.shape); var inputs = [ this.makeComplexComponentTensorInfo(x, xData.complexTensors.real), this.makeComplexComponentTensorInfo(x, xData.complexTensors.imag), ]; return this.compileAndRun(program, inputs); }; MathBackendWebGL.prototype.sigmoid = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SIGMOID); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.softplus = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SOFTPLUS); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.sin = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SIN); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.cos = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.COS); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.tan = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.TAN); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.asin = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ASIN); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.acos = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ACOS); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.atan = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ATAN); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.atan2 = function (a, b) { var program = environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') ? new binaryop_packed_gpu_1.BinaryOpPackedProgram(binaryop_packed_gpu.ATAN2, a.shape, b.shape) : new binaryop_gpu_1.BinaryOpProgram(binaryop_gpu.ATAN2, a.shape, b.shape); return this.compileAndRun(program, [a, b]); }; MathBackendWebGL.prototype.sinh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.SINH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.cosh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.COSH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.tanh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.TANH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.asinh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ASINH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.acosh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ACOSH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.atanh = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ATANH); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.erf = function (x) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.ERF); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.step = function (x, alpha) { var program = new unaryop_gpu_1.UnaryOpProgram(x.shape, unary_op.STEP(alpha)); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.conv2dByMatMul = function (x, filter, convInfo, bias, activation, preluActivationWeights) { // Reshapes conv2D input to 2D tensors, uses matMul and then reshape the // result from 2D to 4D. var xShape = x.shape; var xTexData = this.texData.get(x.dataId); var sharedMatMulDim = convInfo.inChannels; var outerShapeX = xShape[0] * xShape[1] * xShape[2]; var outerShapeFilter = convInfo.outChannels; var isChannelsLast = convInfo.dataFormat === 'channelsLast'; var transposeA = false; var transposeB = false; // TODO: Once reduction ops are packed, batchMatMul will always be packed // and we can remove this condition. var batchMatMulWillBeUnpacked = (outerShapeX === 1 || outerShapeFilter === 1) && sharedMatMulDim > exports.MATMUL_SHARED_DIM_THRESHOLD; var reshapeWillBeExpensive = xShape[2] % 2 !== 0 && !!xTexData.isPacked; if (batchMatMulWillBeUnpacked || !environment_1.env().getBool('WEBGL_LAZILY_UNPACK') || !environment_1.env().getBool('WEBGL_PACK_BINARY_OPERATIONS') || !reshapeWillBeExpensive) { var targetShape_1 = isChannelsLast ? xShape[0] * xShape[1] * xShape[2] : xShape[0] * xShape[2] * xShape[3]; var xReshaped_1 = this.reshape(x, [1, targetShape_1, convInfo.inChannels]); var filterReshaped_1 = this.reshape(filter, [1, convInfo.inChannels, convInfo.outChannels]); return this.reshape(this.fusedBatchMatMul({ a: xReshaped_1, b: filterReshaped_1, transposeA: transposeA, transposeB: transposeB, bias: bias, activation: activation, preluActivationWeights: preluActivationWeights }), convInfo.outShape); } // Following optimization is specific to packed |x| with odd row count // (For example, in channelLast mode, 'row count' refers to x.shape[2]): // we avoid expensive packed 2x2 reshape by padding row count to next, // even number. When x.shape[2] is odd, the result of packed batchMatMul is // the same (has the same texture layout and and values in the texture) as // it is for even x.shape[2] + 1. We make the odd-rows tensor to look like // even-rows tensor before the operation and, after the batchMatMul, // fix the even-rows result to have odd number of rows. var targetShape = isChannelsLast ? xShape[0] * xShape[1] * (xShape[2] + 1) : xShape[0] * xShape[2] * (xShape[3] + 1); var xReshaped = { dataId: x.dataId, shape: [1, targetShape, convInfo.inChannels], dtype: x.dtype }; // xTexData.shape gets referenced from GPGPUBinary.inShapeInfos. // Decrementing row count, after batchMatMul->...->compileProgram leads to // invalid row count within the reference in GPGPUBinary.inShapeInfos. // Alternative fix would be to provide a copy to GPGPUBinary.inShapeInfos // in compileProgram method, but that would affect compilation of all // programs - instead, provide a copy here, with even row count, before // calling batchMatMul->...->compileProgram and after that, the original // xTexData.shape is restored. var originalXTexDataShape = xTexData.shape; xTexData.shape = xTexData.shape.slice(); xTexData.shape[xTexData.shape.length - 2]++; util.assert(webgl_util.isReshapeFree(xTexData.shape, xReshaped.shape), function () { return "packed reshape " + xTexData.shape + " to " + xReshaped.shape + " isn't free"; }); var filterReshaped = this.reshape(filter, [1, convInfo.inChannels, convInfo.outChannels]); var pointwiseConv = this.fusedBatchMatMul({ a: xReshaped, b: filterReshaped, transposeA: transposeA, transposeB: transposeB, bias: bias, activation: activation, preluActivationWeights: preluActivationWeights }); var pointwiseConvTexData = this.texData.get(pointwiseConv.dataId); util.assert(pointwiseConvTexData.isPacked, function () { return 'batchMatMul result is expected to be packed'; }); // Restore the input shape to original. xTexData.shape = originalXTexDataShape; // Set the output shape - there is no need for expensive reshape as data // layout is already correct. pointwiseConvTexData.shape = convInfo.outShape; return engine_1.ENGINE.makeTensorFromDataId(pointwiseConv.dataId, convInfo.outShape, pointwiseConv.dtype); }; MathBackendWebGL.prototype.conv2dWithIm2Row = function (x, filter, convInfo, bias, activation, preluActivationWeights) { // Rearranges conv2d input so each block to be convolved over forms the // column of a new matrix with shape [filterWidth * filterHeight * // inChannels, outHeight * outWidth]. The filter is also rearranged so each // output channel forms a row of a new matrix with shape [outChannels, // filterWidth * filterHeight * inChannels]. The convolution is then // computed by multiplying these matrices and reshaping the result. var filterWidth = convInfo.filterWidth, filterHeight = convInfo.filterHeight, inChannels = convInfo.inChannels, outWidth = convInfo.outWidth, outHeight = convInfo.outHeight, dataFormat = convInfo.dataFormat; var isChannelsLast = dataFormat === 'channelsLast'; var sharedDim = filterWidth * filterHeight * inChannels; var numCols = outHeight * outWidth; var x2ColShape = [sharedDim, numCols]; var transposeA = true; var transposeB = false; var xSqueezed = x.squeeze([0]); var w2Row = filter.reshape([1, sharedDim, -1]); var im2ColProgram = new im2col_packed_gpu_1.Im2ColPackedProgram(x2ColShape, xSqueezed.shape, convInfo); var im2Col = this.compileAndRun(im2ColProgram, [xSqueezed]).reshape([ 1, x2ColShape[0], x2ColShape[1] ]); var hasBias = bias != null; var hasPreluActivationWeights = preluActivationWeights != null; var fusedActivation = activation ? mapActivationToShaderProgram(activation, true) : null; var matmulProgram = new mulmat_packed_gpu_1.MatMulPackedProgram(im2Col.shape, [1, numCols, convInfo.outChannels], transposeA, transposeB, hasBias, fusedActivation, hasPreluActivationWeights); var inputs = [im2Col, w2Row]; if (bias) { inputs.push(bias); } if (hasPreluActivationWeights) { inputs.push(preluActivationWeights); } var product = this.compileAndRun(matmulProgram, inputs); if (isChannelsLast) { return product.reshape([1, outHeight, outWidth, convInfo.outChannels]); } else { return product.reshape([1, convInfo.outChannels, outHeight, outWidth]); } }; MathBackendWebGL.prototype.fusedConv2d = function (_a) { var input = _a.input, filter = _a.filter, convInfo = _a.convInfo, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights; if (convInfo.filterHeight === 1 && convInfo.filterWidth === 1 && convInfo.dilationHeight === 1 && convInfo.dilationWidth === 1 && convInfo.strideHeight === 1 && convInfo.strideWidth === 1 && (convInfo.padInfo.type === 'SAME' || convInfo.padInfo.type === 'VALID')) { return this.conv2dByMatMul(input, filter, convInfo, bias, activation, preluActivationWeights); } if (environment_1.env().getBool('WEBGL_CONV_IM2COL') && input.shape[0] === 1) { return this.conv2dWithIm2Row(input, filter, convInfo, bias, activation, preluActivationWeights); } var hasBias = bias != null; var hasPreluActivationWeights = preluActivationWeights != null; var fusedActivation = activation ? mapActivationToShaderProgram(activation, false) : null; var program = new conv_gpu_1.Conv2DProgram(convInfo, hasBias, fusedActivation, hasPreluActivationWeights); var inputs = [input, filter]; if (bias) { inputs.push(bias); } if (preluActivationWeights) { inputs.push(preluActivationWeights); } return this.compileAndRun(program, inputs); }; MathBackendWebGL.prototype.conv2d = function (x, filter, convInfo) { if (convInfo.filterHeight === 1 && convInfo.filterWidth === 1 && convInfo.dilationHeight === 1 && convInfo.dilationWidth === 1 && convInfo.strideHeight === 1 && convInfo.strideWidth === 1 && (convInfo.padInfo.type === 'SAME' || convInfo.padInfo.type === 'VALID')) { return this.conv2dByMatMul(x, filter, convInfo); } if (environment_1.env().getBool('WEBGL_CONV_IM2COL') && x.shape[0] === 1) { return this.conv2dWithIm2Row(x, filter, convInfo); } var program = new conv_gpu_1.Conv2DProgram(convInfo); return this.compileAndRun(program, [x, filter]); }; MathBackendWebGL.prototype.conv2dDerInput = function (dy, filter, convInfo) { var program = new conv_backprop_gpu_1.Conv2DDerInputProgram(convInfo); return this.compileAndRun(program, [dy, filter]); }; MathBackendWebGL.prototype.conv2dDerFilter = function (x, dy, convInfo) { var program = new conv_backprop_gpu_1.Conv2DDerFilterProgram(convInfo); return this.compileAndRun(program, [x, dy]); }; MathBackendWebGL.prototype.fusedDepthwiseConv2D = function (_a) { var input = _a.input, filter = _a.filter, convInfo = _a.convInfo, bias = _a.bias, activation = _a.activation, preluActivationWeights = _a.preluActivationWeights; var shouldPackDepthwiseConv = environment_1.env().getBool('WEBGL_PACK_DEPTHWISECONV') && convInfo.strideWidth <= 2 && convInfo.outChannels / convInfo.inChannels === 1; var fusedActivation = activation ? mapActivationToShaderProgram(activation, shouldPackDepthwiseConv) : null; var inputs = [input, filter]; var hasBias = bias != null; var hasPreluActivationWeights = preluActivationWeights != null; if (hasBias) { inputs.push(bias); } if (hasPreluActivationWeights) { inputs.push(preluActivationWeights); } var program; if (shouldPackDepthwiseConv) { program = new conv_packed_gpu_depthwise_1.DepthwiseConvPacked2DProgram(convInfo, hasBias, fusedActivation, hasPreluActivationWeights); return this.compileAndRun(program, inputs); } program = new conv_gpu_depthwise_1.DepthwiseConv2DProgram(convInfo, hasBias, fusedActivation, hasPreluActivationWeights); return this.compileAndRun(program, inputs); }; MathBackendWebGL.prototype.depthwiseConv2D = function (x, filter, convInfo) { var program; if (environment_1.env().getBool('WEBGL_PACK_DEPTHWISECONV') && convInfo.strideWidth <= 2 && convInfo.outChannels / convInfo.inChannels === 1) { program = new conv_packed_gpu_depthwise_1.DepthwiseConvPacked2DProgram(convInfo); return this.compileAndRun(program, [x, filter]); } program = new conv_gpu_depthwise_1.DepthwiseConv2DProgram(convInfo); return this.compileAndRun(program, [x, filter]); }; MathBackendWebGL.prototype.depthwiseConv2DDerInput = function (dy, filter, convInfo) { var program = new conv_backprop_gpu_depthwise_1.DepthwiseConv2DDerInputProgram(convInfo); return this.compileAndRun(program, [dy, filter]); }; MathBackendWebGL.prototype.depthwiseConv2DDerFilter = function (x, dy, convInfo) { var program = new conv_backprop_gpu_depthwise_1.DepthwiseConv2DDerFilterProgram(convInfo); return this.compileAndRun(program, [x, dy]); }; MathBackendWebGL.prototype.conv3d = function (x, filter, convInfo) { var program = new conv_gpu_1.Conv3DProgram(convInfo); return this.compileAndRun(program, [x, filter]); }; MathBackendWebGL.prototype.conv3dDerInput = function (dy, filter, convInfo) { var program = new conv_backprop_gpu_1.Conv3DDerInputProgram(convInfo); return this.compileAndRun(program, [dy, filter]); }; MathBackendWebGL.prototype.conv3dDerFilter = function (x, dy, convInfo) { var program = new conv_backprop_gpu_1.Conv3DDerFilterProgram(convInfo); return this.compileAndRun(program, [x, dy]); }; MathBackendWebGL.prototype.maxPool = function (x, convInfo) { var program = new pool_gpu_1.Pool2DProgram(convInfo, 'max', false); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.avgPool = function (x, convInfo) { var program = new pool_gpu_1.Pool2DProgram(convInfo, 'avg', false); return this.compileAndRun(program, [x], 'float32'); }; MathBackendWebGL.prototype.maxPoolBackprop = function (dy, x, y, convInfo) { var getPositions = true; var maxPoolPositionsProgram = new pool_gpu_1.Pool2DProgram(convInfo, 'max', getPositions); var maxPoolPositions = this.compileAndRun(maxPoolPositionsProgram, [x]); var maxPoolBackPropProgram = new max_pool_backprop_gpu_1.MaxPool2DBackpropProgram(convInfo); var result = this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions], x.dtype); maxPoolPositions.dispose(); return result; }; MathBackendWebGL.prototype.avgPoolBackprop = function (dy, x, convInfo) { var avgPoolBackpropProgram = new avg_pool_backprop_gpu_1.AvgPool2DBackpropProgram(convInfo); return this.compileAndRun(avgPoolBackpropProgram, [dy], x.dtype); }; MathBackendWebGL.prototype.cast = function (x, dtype) { return backend_util.castTensor(x, dtype, this); }; MathBackendWebGL.prototype.unstack = function (x, axis) { var num = x.shape[axis]; var outShape = new Array(x.rank - 1); var outIndex = 0; for (var i = 0; i < x.rank; i++) { if (i !== axis) { outShape[outIndex++] = x.shape[i]; } } var begin = new Array(x.rank).fill(0); var size = x.shape.slice(); size[axis] = 1; var res = new Array(num); for (var i = 0; i < res.length; i++) { begin[axis] = i; res[i] = this.slice(x, begin, size).reshape(outShape); } return res; }; MathBackendWebGL.prototype.avgPool3d = function (x, convInfo) { var program = new pool_gpu_1.Pool3DProgram(convInfo, 'avg', false); return this.compileAndRun(program, [x], 'float32'); }; MathBackendWebGL.prototype.avgPool3dBackprop = function (dy, x, convInfo) { var avgPool3dBackpropProgram = new avg_pool_backprop_gpu_1.AvgPool3DBackpropProgram(convInfo); return this.compileAndRun(avgPool3dBackpropProgram, [dy], x.dtype); }; MathBackendWebGL.prototype.maxPool3d = function (x, convInfo) { var program = new pool_gpu_1.Pool3DProgram(convInfo, 'max', false); return this.compileAndRun(program, [x], 'float32'); }; MathBackendWebGL.prototype.maxPool3dBackprop = function (dy, x, y, convInfo) { var getPositions = true; var maxPool3dPositionsProgram = new pool_gpu_1.Pool3DProgram(convInfo, 'max', getPositions); var maxPool3dPositions = this.compileAndRun(maxPool3dPositionsProgram, [x]); var maxPool3dBackPropProgram = new max_pool_backprop_gpu_1.MaxPool3DBackpropProgram(convInfo); var result = this.compileAndRun(maxPool3dBackPropProgram, [dy, maxPool3dPositions], x.dtype); maxPool3dPositions.dispose(); return result; }; MathBackendWebGL.prototype.reshape = function (x, shape) { var texData = this.texData.get(x.dataId); if (texData.isPacked && !webgl_util.isReshapeFree(x.shape, shape) && !(texData.texture !== null && webgl_util.isReshapeFree(texData.shape, shape))) { var info = this.packedReshape(x, shape); return engine_1.ENGINE.makeTensorFromDataId(info.dataId, info.shape, info.dtype); } return backend_util.reshapeTensor(x, shape); }; MathBackendWebGL.prototype.resizeBilinear = function (x, newHeight, newWidth, alignCorners) { var program = environment_1.env().getBool('WEBGL_PACK_IMAGE_OPERATIONS') ? new resize_bilinear_packed_gpu_1.ResizeBilinearPackedProgram(x.shape, newHeight, newWidth, alignCorners) : new resize_bilinear_gpu_1.ResizeBilinearProgram(x.shape, newHeight, newWidth, alignCorners); return this.compileAndRun(program, [x], 'float32'); }; MathBackendWebGL.prototype.resizeBilinearBackprop = function (dy, x, alignCorners) { var program = new resize_bilinear_backprop_gpu_1.ResizeBilinearBackpropProgram(dy, x, alignCorners); return this.compileAndRun(program, [dy]); }; MathBackendWebGL.prototype.resizeNearestNeighbor = function (x, newHeight, newWidth, alignCorners) { var program = new resize_nearest_neighbor_gpu_1.ResizeNearestNeighborProgram(x.shape, newHeight, newWidth, alignCorners); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.resizeNearestNeighborBackprop = function (dy, x, alignCorners) { var program = new resize_nearest_neighbor_backprop_gpu_1.ResizeNearestNeigborBackpropProgram(dy, x, alignCorners); return this.compileAndRun(program, [dy]); }; MathBackendWebGL.prototype.multinomial = function (logits, normalized, numSamples, seed) { var probs = normalized ? logits : softmax_1.softmax(logits); var batchSize = probs.shape[0]; var numOutcomes = probs.shape[1]; var program = new multinomial_gpu_1.MultinomialProgram(batchSize, numOutcomes, numSamples); var customSetup = program.getCustomSetupFunc(seed); return this.compileAndRun(program, [probs], 'int32', customSetup); }; MathBackendWebGL.prototype.oneHot = function (indices, depth, onValue, offValue) { var program = new onehot_gpu_1.OneHotProgram(indices.size, depth, onValue, offValue); return this.compileAndRun(program, [indices]); }; MathBackendWebGL.prototype.diag = function (x) { var program = new diag_gpu_1.DiagProgram(x.size); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.nonMaxSuppression = function (boxes, scores, maxOutputSize, iouThreshold, scoreThreshold) { log_1.warn('tf.nonMaxSuppression() in webgl locks the UI thread. ' + 'Call tf.nonMaxSuppressionAsync() instead'); var boxesVals = boxes.dataSync(); var scoresVals = scores.dataSync(); return non_max_suppression_impl_1.nonMaxSuppressionV3(boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold); }; MathBackendWebGL.prototype.cropAndResize = function (image, boxes, boxIndex, cropSize, method, extrapolationValue) { var program = new crop_and_resize_gpu_1.CropAndResizeProgram(image.shape, boxes.shape, cropSize, method, extrapolationValue); return this.compileAndRun(program, [image, boxes, boxIndex], 'float32'); }; MathBackendWebGL.prototype.depthToSpace = function (x, blockSize, dataFormat) { util.assert(blockSize > 1, function () { return "blockSize should be > 1 for depthToSpace, but was: " + blockSize; }); var batchSize = x.shape[0]; var inputHeight = (dataFormat === 'NHWC') ? x.shape[1] : x.shape[2]; var inputWidth = (dataFormat === 'NHWC') ? x.shape[2] : x.shape[3]; var inputDepth = (dataFormat === 'NHWC') ? x.shape[3] : x.shape[1]; var outputHeight = inputHeight * blockSize; var outputWidth = inputWidth * blockSize; var outputDepth = inputDepth / (blockSize * blockSize); var outputShape = (dataFormat === 'NHWC') ? [batchSize, outputHeight, outputWidth, outputDepth] : [batchSize, outputDepth, outputHeight, outputWidth]; var program = new depth_to_space_gpu_1.DepthToSpaceProgram(outputShape, blockSize, dataFormat); return this.compileAndRun(program, [x]); }; MathBackendWebGL.prototype.split = function (x, sizeSplits, axis) { return split_shared_1.split(x, sizeSplits, axis); }; MathBackendWebGL.prototype.scatterND = function (indices, updates, shape) { var _a = scatter_nd_util.calculateShapes(updates, indices, shape), sliceRank = _a.sliceRank, numUpdates = _a.numUpdates, sliceSize = _a.sliceSize, strides = _a.strides, outputSize = _a.outputSize; var flattenShape = [outputSize / sliceSize, sliceSize]; var flattenIndices = indices.reshape([numUpdates, sliceRank]); var flattenX = updates.reshape([numUpdates, sliceSize]); if (outputSize === 0) { return backend_util.reshapeTensor(tensor_ops_1.tensor([]), shape); } var defaultValue = tensor_ops_1.scalar(0); var program = new scatter_gpu_1.ScatterProgram(numUpdates, sliceRank, flattenIndices.rank, flattenX.rank, strides, flattenShape); var res = this.compileAndRun(program, [flattenX, flattenIndices, defaultValue]); return res.reshape(shape); }; MathBackendWebGL.prototype.sparseToDense = function (sparseIndices, sparseValues, outputShape, defaultValue) { var _a = scatter_nd_util.calculateShapes(sparseValues, sparseIndices, outputShape), sliceRank = _a.sliceRank, numUpdates = _a.numUpdates, strides = _a.strides, outputSize = _a.outputSize; var sumDupeIndices = false; var program = new scatter_gpu_1.ScatterProgram(numUpdates, sliceRank, sparseIndices.rank, sparseValues.rank, strides, [outputSize, 1], sumDupeIndices); var res = this.compileAndRun(program, [sparseValues, sparseIndices, defaultValue]); return res.reshape(outputShape); }; MathBackendWebGL.prototype.fft = function (x) { var inverse = false; return this.fftImpl(x, inverse); }; MathBackendWebGL.prototype.ifft = function (x) { var inverse = true; return this.fftImpl(x, inverse); }; MathBackendWebGL.prototype.fftImpl = function (x, inverse) { var xData = this.texData.get(x.dataId); var realProgram = new fft_gpu_1.FFTProgram(fft_gpu.COMPLEX_FFT.REAL, x.shape, inverse); var imagProgram = new fft_gpu_1.FFTProgram(fft_gpu.COMPLEX_FFT.IMAG, x.shape, inverse); var inputs = [ this.makeComplexComponentTensorInfo(x, xData.complexTensors.real), this.makeComplexComponentTensorInfo(x, xData.complexTensors.imag), ]; var real = this.compileAndRun(realProgram, inputs); var imag = this.compileAndRun(imagProgram, inputs); var complex = this.complex(real, imag).as2D(x.shape[0], x.shape[1]); real.dispose(); imag.dispose(); return complex; }; MathBackendWebGL.prototype.gatherND = function (x, indices) { var indicesShape = indices.shape; var sliceRank = indicesShape[indicesShape.length - 1]; var _a = gather_nd_util.prepareAndValidate(x, indices), resultShape = _a[0], numSlices = _a[1], sliceSize = _a[2], strides = _a[3]; var flattenIndices = indices.reshape([numSlices, sliceRank]); var flattenX = x.reshape([x.size / sliceSize, sliceSize]); var program = new gather_nd_gpu_1.GatherNDProgram(sliceRank, strides, [numSlices, sliceSize]); var res = this.compileAndRun(program, [flattenX, flattenIndices]); return res.reshape(resultShape); }; MathBackendWebGL.prototype.fill = function (shape, value, dtype) { dtype = dtype || util_1.inferDtype(value); if (dtype === 'string') { // String type should be handled in CPU memory. var values = util_1.getArrayFromDType(dtype, util_1.sizeFromShape(shape)); values.fill(value); return engine_1.ENGINE.makeTensor(values, shape, dtype, this); } else { var program = new fill_gpu_1.FillProgram(shape, value); var customSetup = program.getCustomSetupFunc(value); return this.compileAndRun(program, [], dtype, customSetup); } }; MathBackendWebGL.prototype.onesLike = function (x) { if (x.dtype === 'string') { throw new Error('onesLike is not supported under string dtype'); } else { // TODO(cais, smilkov): Add WebGL shader for onesLike: // https://github.com/tensorflow/tfjs/issues/1293 return this.fill(x.shape, 1, x.dtype); } }; MathBackendWebGL.prototype.zerosLike = function (x) { return this.fill(x.shape, x.dtype === 'string' ? '' : 0, x.dtype); }; MathBackendWebGL.prototype.linspace = function (start, stop, num) { // TODO: Use CPU implementation due to the precision problem in Safari. return backend_util.linspaceImpl(start, stop, num); }; MathBackendWebGL.prototype.makeTensorInfo = function (shape, dtype) { var dataId = this.write(null /* values */, shape, dtype); this.texData.get(dataId).usage = null; return { dataId: dataId, shape: shape, dtype: dtype }; }; MathBackendWebGL.prototype.makeOutput = function (shape, dtype) { var dataId = this.makeTensorInfo(shape, dtype).dataId; return engine_1.ENGINE.makeTensorFromDataId(dataId, shape, dtype, this); }; MathBackendWebGL.prototype.unpackTensor = function (input) { var program = new unpack_gpu_1.UnpackProgram(input.shape); return this.runWebGLProgram(program, [input], input.dtype); }; MathBackendWebGL.prototype.packTensor = function (input) { var program = new pack_gpu_1.PackProgram(input.shape); var preventEagerUnpackingOutput = true; return this.runWebGLProgram(program, [input], input.dtype, null /* customSetup */, preventEagerUnpackingOutput); }; MathBackendWebGL.prototype.packedReshape = function (input, afterShape) { var input3DShape = [ webgl_util.getBatchDim(input.shape) ].concat(webgl_util.getRowsCols(input.shape)); var input3D = { dtype: input.dtype, shape: input3DShape, dataId: input.dataId }; var afterShapeAs3D = [ webgl_util.getBatchDim(afterShape) ].concat(webgl_util.getRowsCols(afterShape)); var program = new reshape_packed_gpu_1.ReshapePackedProgram(afterShapeAs3D, input3DShape); var preventEagerUnpackingOfOutput = true; var output = this.runWebGLProgram(program, [input3D], input.dtype, null /* customSetup */, preventEagerUnpackingOfOutput); return { dataId: output.dataId, shape: afterShape, dtype: output.dtype }; }; MathBackendWebGL.prototype.decode = function (dataId) { var texData = this.texData.get(dataId); var isPacked = texData.isPacked, shape = texData.shape, dtype = texData.dtype; var shapeAs3D = webgl_util.getShapeAs3D(shape); var program; if (isPacked) { program = new decode_matrix_packed_gpu_1.DecodeMatrixPackedProgram(shapeAs3D); } else { program = new decode_matrix_gpu_1.DecodeMatrixProgram(shapeAs3D); } var preventEagerUnpackingOfOutput = true; var out = this.runWebGLProgram(program, [{ shape: shapeAs3D, dtype: dtype, dataId: dataId }], dtype, null /* customSetup */, preventEagerUnpackingOfOutput); return { dtype: dtype, shape: shape, dataId: out.dataId }; }; MathBackendWebGL.prototype.runWebGLProgram = function (program, inputs, outputDtype, customSetup, preventEagerUnpackingOfOutput) { var _this = this; if (preventEagerUnpackingOfOutput === void 0) { preventEagerUnpackingOfOutput = false; } var output = this.makeTensorInfo(program.outputShape, outputDtype); var outData = this.texData.get(output.dataId); if (program.packedOutput) { outData.isPacked = true; } if (program.outPackingScheme === tex_util.PackingScheme.DENSE) { var texelShape = tex_util.getDenseTexShape(program.outputShape); // For a densely packed output, we explicitly set texShape // so it doesn't get assigned later according to our typical packing // scheme wherein a single texel can only contain values from adjacent // rows/cols. outData.texShape = texelShape.map(function (d) { return d * 2; }); } if (program.outTexUsage != null) { outData.usage = program.outTexUsage; } if (util_1.sizeFromShape(output.shape) === 0) { // Short-circuit the computation since the result is empty (has 0 in its // shape). outData.values = util_1.getTypedArrayFromDType(output.dtype, 0); return output; } var dataToDispose = []; var inputsData = inputs.map(function (input) { if (input.dtype === 'complex64') { throw new Error("GPGPUProgram does not support complex64 input. For complex64 " + "dtypes, please separate the program into real and imaginary " + "parts."); } var texData = _this.texData.get(input.dataId); if (texData.texture == null) { if (!program.packedInputs && util.sizeFromShape(input.shape) <= environment_1.env().getNumber('WEBGL_SIZE_UPLOAD_UNIFORM')) { // Upload small tensors that live on the CPU as uniforms, not as // textures. Do this only when the environment supports 32bit floats // due to problems when comparing 16bit floats with 32bit floats. // TODO(https://github.com/tensorflow/tfjs/issues/821): Make it // possible for packed shaders to sample from uniforms. return { shape: input.shape, texData: null, isUniform: true, uniformValues: texData.values }; } // This ensures that if a packed program's inputs have not yet been // uploaded to the GPU, they get uploaded as packed right off the bat. if (program.packedInputs) { texData.isPacked = true; texData.shape = input.shape; } } else if (!!texData.isPacked !== !!program.packedInputs) { input = texData.isPacked ? _this.unpackTensor(input) : _this.packTensor(input); dataToDispose.push(input); texData = _this.texData.get(input.dataId); } else if (texData.isPacked && !webgl_util.isReshapeFree(texData.shape, input.shape)) { // This is a special case where a texture exists for a tensor // but the shapes are incompatible (due to packing constraints) because // the tensor did not have a chance to go through the packed reshape // shader. This only happens when we reshape the *same* tensor to form // *distinct* inputs to an op, e.g. dotting a vector with itself. This // case will disappear once packed uploading is the default. var savedInput = input; var targetShape = input.shape; input.shape = texData.shape; input = _this.packedReshape(input, targetShape); dataToDispose.push(input); texData = _this.texData.get(input.dataId); savedInput.shape = targetShape; } _this.uploadToGPU(input.dataId); return { shape: input.shape, texData: texData, isUniform: false }; }); this.uploadToGPU(output.dataId); var outputData = { shape: output.shape, texData: outData, isUniform: false }; var key = gpgpu_math.makeShaderKey(program, inputsData, outputData); var binary = this.getAndSaveBinary(key, function () { return gpgpu_math.compileProgram(_this.gpgpu, program, inputsData, outputData); }); var shouldTimeProgram = this.activeTimers != null; var query; if (shouldTimeProgram) { query = this.startTimer(); } gpgpu_math.runProgram(this.gpgpu, binary, inputsData, outputData, customSetup); dataToDispose.forEach(function (info) { return _this.disposeData(info.dataId); }); if (shouldTimeProgram) { query = this.endTimer(query); this.activeTimers.push({ name: program.constructor.name, query: this.getQueryTime(query) }); } if (!environment_1.env().getBool('WEBGL_LAZILY_UNPACK') && outData.isPacked && preventEagerUnpackingOfOutput === false) { var unpacked = this.unpackTensor(output); this.disposeData(output.dataId); return unpacked; } return output; }; MathBackendWebGL.prototype.compileAndRun = function (program, inputs, outputDtype, customSetup, preventEagerUnpackingOfOutput) { if (preventEagerUnpackingOfOutput === void 0) { preventEagerUnpackingOfOutput = false; } outputDtype = outputDtype || inputs[0].dtype; var outInfo = this.runWebGLProgram(program, inputs, outputDtype, customSetup, preventEagerUnpackingOfOutput); return engine_1.ENGINE.makeTensorFromDataId(outInfo.dataId, outInfo.shape, outInfo.dtype); }; MathBackendWebGL.prototype.getAndSaveBinary = function (key, getBinary) { if (!(key in this.binaryCache)) { this.binaryCache[key] = getBinary(); } return this.binaryCache[key]; }; MathBackendWebGL.prototype.getTextureManager = function () { return this.textureManager; }; MathBackendWebGL.prototype.dispose = function () { var _this = this; if (this.disposed) { return; } // Avoid disposing the compiled webgl programs during unit testing because // it slows down test execution. if (!environment_1.env().getBool('IS_TEST')) { var allKeys = Object.keys(this.binaryCache); allKeys.forEach(function (key) { _this.gpgpu.deleteProgram(_this.binaryCache[key].webGLProgram); delete _this.binaryCache[key]; }); } this.textureManager.dispose(); if (this.canvas != null && (typeof (HTMLCanvasElement) !== 'undefined' && this.canvas instanceof HTMLCanvasElement)) { this.canvas.remove(); } else { this.canvas = null; } if (this.gpgpuCreatedLocally) { this.gpgpu.program = null; this.gpgpu.dispose(); } this.disposed = true; }; MathBackendWebGL.prototype.floatPrecision = function () { var _this = this; if (this.floatPrecisionValue == null) { this.floatPrecisionValue = globals_1.tidy(function () { if (!environment_1.env().get('WEBGL_RENDER_FLOAT32_ENABLED')) { // Momentarily switching DEBUG flag to false so we don't throw an // error trying to upload a small value. var debugFlag = environment_1.env().getBool('DEBUG'); environment_1.env().set('DEBUG', false); var underflowCheckValue = _this.abs(tensor_ops_1.scalar(1e-8)).dataSync()[0]; environment_1.env().set('DEBUG', debugFlag); if (underflowCheckValue > 0) { return 32; } } return 16; }); } return this.floatPrecisionValue; }; /** Returns the smallest representable number. */ MathBackendWebGL.prototype.epsilon = function () { return this.floatPrecision() === 32 ? backend_1.EPSILON_FLOAT32 : backend_1.EPSILON_FLOAT16; }; MathBackendWebGL.prototype.uploadToGPU = function (dataId) { var _a; var texData = this.texData.get(dataId); var shape = texData.shape, dtype = texData.dtype, values = texData.values, texture = texData.texture, usage = texData.usage, isPacked = texData.isPacked; if (texture != null) { // Array is already on GPU. No-op. return; } var shouldTimeProgram = this.activeTimers != null; var start; if (shouldTimeProgram) { start = util.now(); } var texShape = texData.texShape; if (texShape == null) { texShape = webgl_util.getTextureShapeFromLogicalShape(shape, isPacked); texData.texShape = texShape; } if (values != null) { var shapeAs3D = webgl_util.getShapeAs3D(shape); var program = void 0; var width = texShape[1], height = texShape[0]; var isByteArray = values instanceof Uint8Array; if (isPacked) { _a = tex_util.getPackedMatrixTextureShapeWidthHeight(texShape[0], texShape[1]), width = _a[0], height = _a[1]; program = new encode_matrix_packed_gpu_1.EncodeMatrixPackedProgram(shapeAs3D, [height, width], isByteArray); } else { program = new encode_matrix_gpu_1.EncodeMatrixProgram(shapeAs3D, [height, width], isByteArray); } var tempDenseInputHandle = this.makeTensorInfo([height, width], dtype); if (isByteArray) { this.texData.get(tempDenseInputHandle.dataId).usage = tex_util_1.TextureUsage.PIXELS; } else { this.texData.get(tempDenseInputHandle.dataId).usage = tex_util_1.TextureUsage.UPLOAD; } this.gpgpu.uploadDenseMatrixToTexture(this.getTexture(tempDenseInputHandle.dataId), width, height, values); // We want the output to remain packed regardless of the value of // WEBGL_PACK. var preventEagerUnpacking = true; var encodedOutputTarget = this.runWebGLProgram(program, [tempDenseInputHandle], dtype, null, preventEagerUnpacking); // Have the original texture assume the identity of the encoded output. var outputTexData = this.texData.get(encodedOutputTarget.dataId); texData.texture = outputTexData.texture; texData.texShape = outputTexData.texShape; texData.isPacked = outputTexData.isPacked; texData.usage = outputTexData.usage; this.disposeData(tempDenseInputHandle.dataId); this.texData.delete(encodedOutputTarget.dataId); // Once uploaded, don't store the values on cpu. texData.values = null; if (shouldTimeProgram) { this.uploadWaitMs += util.now() - start; } } else { var newTexture = this.acquireTexture(texShape, usage, dtype, isPacked); texData.texture = newTexture; } }; MathBackendWebGL.prototype.convertAndCacheOnCPU = function (dataId, float32Values) { var texData = this.texData.get(dataId); var dtype = texData.dtype; this.releaseGPUData(dataId); if (float32Values != null) { texData.values = float32ToTypedArray(float32Values, dtype); } return texData.values; }; MathBackendWebGL.prototype.acquireTexture = function (texShape, texType, dtype, isPacked) { this.numBytesInGPU += this.computeBytes(texShape, dtype); if (!this.warnedAboutMemory && this.numBytesInGPU > this.numMBBeforeWarning * 1024 * 1024) { var mb = (this.numBytesInGPU / 1024 / 1024).toFixed(2); this.warnedAboutMemory = true; console.warn("High memory usage in GPU: " + mb + " MB, " + "most likely due to a memory leak"); } return this.textureManager.acquireTexture(texShape, texType, isPacked); }; MathBackendWebGL.prototype.computeBytes = function (shape, dtype) { return shape[0] * shape[1] * util.bytesPerElement(dtype); }; return MathBackendWebGL; }(backend_1.KernelBackend)); exports.MathBackendWebGL = MathBackendWebGL; if (device_util.isBrowser()) { engine_1.ENGINE.registerBackend('webgl', function () { return new MathBackendWebGL(); }, 2 /* priority */); } function float32ToTypedArray(a, dtype) { if (dtype === 'float32' || dtype === 'complex64') { return a; } else if (dtype === 'int32' || dtype === 'bool') { var result = (dtype === 'int32') ? new Int32Array(a.length) : new Uint8Array(a.length); for (var i = 0; i < result.length; ++i) { result[i] = Math.round(a[i]); } return result; } else { throw new Error("Unknown dtype " + dtype); } } //# sourceMappingURL=backend_webgl.js.map