/**
|
* @license
|
* Copyright 2021 Google LLC. All Rights Reserved.
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
* =============================================================================
|
*/
|
import { util } from '@tensorflow/tfjs-core';
|
function split(str, delimiters, skipEmpty, result) {
|
if (!str.length) {
|
return;
|
}
|
// When the delimiter is empty, the input is split into individual characters.
|
if (delimiters.length === 0) {
|
for (let i = 0; i < str.length; ++i) {
|
result.push(str.subarray(i, i + 1));
|
}
|
return;
|
}
|
// When there is one delimiter, the input is split only at that delimiter.
|
if (delimiters.length === 1) {
|
const delimiter = delimiters[0];
|
let f = str.indexOf(delimiter);
|
while (f !== -1) {
|
const token = str.subarray(0, f);
|
if (!skipEmpty || token.length !== 0) {
|
result.push(token);
|
}
|
str = str.subarray(f + 1);
|
f = str.indexOf(delimiter);
|
}
|
if (!skipEmpty || str.length !== 0) {
|
result.push(str);
|
}
|
return;
|
}
|
// When there are multiple delimiters, the input is split at every instance
|
// one of the delimiters appears.
|
let tokenStart = 0;
|
for (let i = 0; i < str.length + 1; i++) {
|
if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {
|
const token = str.subarray(tokenStart, i);
|
if (!skipEmpty || token.length !== 0) {
|
result.push(token);
|
}
|
tokenStart = i + 1;
|
}
|
}
|
}
|
export function stringSplitImpl(input, delimiter, skipEmpty) {
|
const batchSize = input.length;
|
// Empty delimiter means split the input character by character.
|
const tokens = [];
|
let outputSize = 0;
|
let maxNumEntries = 0;
|
const numIndices = new Array(batchSize);
|
for (let i = 0; i < batchSize; ++i) {
|
const prevTokensLength = tokens.length;
|
split(input[i], delimiter, skipEmpty, tokens);
|
const nEntries = tokens.length - prevTokensLength;
|
numIndices[i] = nEntries;
|
outputSize += nEntries;
|
maxNumEntries = Math.max(maxNumEntries, nEntries);
|
}
|
const indices = util.getArrayFromDType('int32', outputSize * 2);
|
const values = new Array(outputSize);
|
const shape = [batchSize, maxNumEntries];
|
let c = 0;
|
for (let i = 0; i < batchSize; ++i) {
|
for (let j = 0; j < numIndices[i]; ++j) {
|
// indices is a 2d tensor with shape of [outputSize, 2]
|
indices[c * 2] = i;
|
indices[c * 2 + 1] = j;
|
values[c] = tokens[c];
|
++c;
|
}
|
}
|
return [indices, values, shape];
|
}
|
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"StringSplit_impl.js","sourceRoot":"","sources":["../../../../../../tfjs-backend-cpu/src/kernels/StringSplit_impl.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAa,IAAI,EAAC,MAAM,uBAAuB,CAAC;AAEvD,SAAS,KAAK,CACV,GAAe,EAAE,UAAsB,EAAE,SAAkB,EAC3D,MAAoB;IACtB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE;QACf,OAAO;KACR;IACD,8EAA8E;IAC9E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YACnC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACrC;QACD,OAAO;KACR;IACD,0EAA0E;IAC1E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;QAC3B,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAChC,IAAI,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE;YACf,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACjC,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;gBACpC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;YACD,GAAG,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1B,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;SAC5B;QACD,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE;YAClC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAClB;QACD,OAAO;KACR;IACD,2EAA2E;IAC3E,iCAAiC;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;QACvC,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YAC7D,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC1C,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;gBACpC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;YACD,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;SACpB;KACF;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAC3B,KAAmB,EAAE,SAAqB,EAC1C,SAAkB;IACpB,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;IAE/B,gEAAgE;IAChE,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,MAAM,UAAU,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,EAAE,CAAC,EAAE;QAClC,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC;QACvC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,GAAG,gBAAgB,CAAC;QAClD,UAAU,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC;QACzB,UAAU,IAAI,QAAQ,CAAC;QACvB,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;KACnD;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,UAAU,GAAG,CAAC,CAAe,CAAC;IAC9E,MAAM,MAAM,GAAiB,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,MAAM,KAAK,GAAqB,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAE3D,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,EAAE,CAAC,EAAE;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE;YACtC,uDAAuD;YACvD,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACnB,OAAO,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACtB,EAAE,CAAC,CAAC;SACL;KACF;IAED,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;AAClC,CAAC","sourcesContent":["/**\n * @license\n * Copyright 2021 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n\nimport {TypedArray, util} from '@tensorflow/tfjs-core';\n\nfunction split(\n    str: Uint8Array, delimiters: Uint8Array, skipEmpty: boolean,\n    result: Uint8Array[]): void {\n  if (!str.length) {\n    return;\n  }\n  // When the delimiter is empty, the input is split into individual characters.\n  if (delimiters.length === 0) {\n    for (let i = 0; i < str.length; ++i) {\n      result.push(str.subarray(i, i + 1));\n    }\n    return;\n  }\n  // When there is one delimiter, the input is split only at that delimiter.\n  if (delimiters.length === 1) {\n    const delimiter = delimiters[0];\n    let f = str.indexOf(delimiter);\n    while (f !== -1) {\n      const token = str.subarray(0, f);\n      if (!skipEmpty || token.length !== 0) {\n        result.push(token);\n      }\n      str = str.subarray(f + 1);\n      f = str.indexOf(delimiter);\n    }\n    if (!skipEmpty || str.length !== 0) {\n      result.push(str);\n    }\n    return;\n  }\n  // When there are multiple delimiters, the input is split at every instance\n  // one of the delimiters appears.\n  let tokenStart = 0;\n  for (let i = 0; i < str.length + 1; i++) {\n    if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {\n      const token = str.subarray(tokenStart, i);\n      if (!skipEmpty || token.length !== 0) {\n        result.push(token);\n      }\n      tokenStart = i + 1;\n    }\n  }\n}\n\nexport function stringSplitImpl(\n    input: Uint8Array[], delimiter: Uint8Array,\n    skipEmpty: boolean): [TypedArray, Uint8Array[], [number, number]] {\n  const batchSize = input.length;\n\n  // Empty delimiter means split the input character by character.\n  const tokens: Uint8Array[] = [];\n\n  let outputSize = 0;\n  let maxNumEntries = 0;\n  const numIndices: number[] = new Array(batchSize);\n  for (let i = 0; i < batchSize; ++i) {\n    const prevTokensLength = tokens.length;\n    split(input[i], delimiter, skipEmpty, tokens);\n    const nEntries = tokens.length - prevTokensLength;\n    numIndices[i] = nEntries;\n    outputSize += nEntries;\n    maxNumEntries = Math.max(maxNumEntries, nEntries);\n  }\n\n  const indices = util.getArrayFromDType('int32', outputSize * 2) as TypedArray;\n  const values: Uint8Array[] = new Array(outputSize);\n  const shape: [number, number] = [batchSize, maxNumEntries];\n\n  let c = 0;\n  for (let i = 0; i < batchSize; ++i) {\n    for (let j = 0; j < numIndices[i]; ++j) {\n      // indices is a 2d tensor with shape of [outputSize, 2]\n      indices[c * 2] = i;\n      indices[c * 2 + 1] = j;\n      values[c] = tokens[c];\n      ++c;\n    }\n  }\n\n  return [indices, values, shape];\n}\n"]}
|