/**
|
* @license
|
* Copyright 2021 Google LLC. All Rights Reserved.
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
* =============================================================================
|
*/
|
import { util } from '@tensorflow/tfjs-core';
|
function split(str, delimiters, skipEmpty, result) {
|
if (!str.length) {
|
return;
|
}
|
// When the delimiter is empty, the input is split into individual characters.
|
if (delimiters.length === 0) {
|
for (let i = 0; i < str.length; ++i) {
|
result.push(str.subarray(i, i + 1));
|
}
|
return;
|
}
|
// When there is one delimiter, the input is split only at that delimiter.
|
if (delimiters.length === 1) {
|
const delimiter = delimiters[0];
|
let f = str.indexOf(delimiter);
|
while (f !== -1) {
|
const token = str.subarray(0, f);
|
if (!skipEmpty || token.length !== 0) {
|
result.push(token);
|
}
|
str = str.subarray(f + 1);
|
f = str.indexOf(delimiter);
|
}
|
if (!skipEmpty || str.length !== 0) {
|
result.push(str);
|
}
|
return;
|
}
|
// When there are multiple delimiters, the input is split at every instance
|
// one of the delimiters appears.
|
let tokenStart = 0;
|
for (let i = 0; i < str.length + 1; i++) {
|
if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {
|
const token = str.subarray(tokenStart, i);
|
if (!skipEmpty || token.length !== 0) {
|
result.push(token);
|
}
|
tokenStart = i + 1;
|
}
|
}
|
}
|
export function stringSplitImpl(input, delimiter, skipEmpty) {
|
const batchSize = input.length;
|
// Empty delimiter means split the input character by character.
|
const tokens = [];
|
let outputSize = 0;
|
let maxNumEntries = 0;
|
const numIndices = new Array(batchSize);
|
for (let i = 0; i < batchSize; ++i) {
|
const prevTokensLength = tokens.length;
|
split(input[i], delimiter, skipEmpty, tokens);
|
const nEntries = tokens.length - prevTokensLength;
|
numIndices[i] = nEntries;
|
outputSize += nEntries;
|
maxNumEntries = Math.max(maxNumEntries, nEntries);
|
}
|
const indices = util.getArrayFromDType('int32', outputSize * 2);
|
const values = new Array(outputSize);
|
const shape = [batchSize, maxNumEntries];
|
let c = 0;
|
for (let i = 0; i < batchSize; ++i) {
|
for (let j = 0; j < numIndices[i]; ++j) {
|
// indices is a 2d tensor with shape of [outputSize, 2]
|
indices[c * 2] = i;
|
indices[c * 2 + 1] = j;
|
values[c] = tokens[c];
|
++c;
|
}
|
}
|
return [indices, values, shape];
|
}
|
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiU3RyaW5nU3BsaXRfaW1wbC5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uLy4uLy4uLy4uLy4uL3RmanMtYmFja2VuZC1jcHUvc3JjL2tlcm5lbHMvU3RyaW5nU3BsaXRfaW1wbC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQTs7Ozs7Ozs7Ozs7Ozs7O0dBZUc7QUFFSCxPQUFPLEVBQWEsSUFBSSxFQUFDLE1BQU0sdUJBQXVCLENBQUM7QUFFdkQsU0FBUyxLQUFLLENBQ1YsR0FBZSxFQUFFLFVBQXNCLEVBQUUsU0FBa0IsRUFDM0QsTUFBb0I7SUFDdEIsSUFBSSxDQUFDLEdBQUcsQ0FBQyxNQUFNLEVBQUU7UUFDZixPQUFPO0tBQ1I7SUFDRCw4RUFBOEU7SUFDOUUsSUFBSSxVQUFVLENBQUMsTUFBTSxLQUFLLENBQUMsRUFBRTtRQUMzQixLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsRUFBRSxDQUFDLEdBQUcsR0FBRyxDQUFDLE1BQU0sRUFBRSxFQUFFLENBQUMsRUFBRTtZQUNuQyxNQUFNLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxRQUFRLENBQUMsQ0FBQyxFQUFFLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxDQUFDO1NBQ3JDO1FBQ0QsT0FBTztLQUNSO0lBQ0QsMEVBQTBFO0lBQzFFLElBQUksVUFBVSxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUU7UUFDM0IsTUFBTSxTQUFTLEdBQUcsVUFBVSxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBQ2hDLElBQUksQ0FBQyxHQUFHLEdBQUcsQ0FBQyxPQUFPLENBQUMsU0FBUyxDQUFDLENBQUM7UUFDL0IsT0FBTyxDQUFDLEtBQUssQ0FBQyxDQUFDLEVBQUU7WUFDZixNQUFNLEtBQUssR0FBRyxHQUFHLENBQUMsUUFBUSxDQUFDLENBQUMsRUFBRSxDQUFDLENBQUMsQ0FBQztZQUNqQyxJQUFJLENBQUMsU0FBUyxJQUFJLEtBQUssQ0FBQyxNQUFNLEtBQUssQ0FBQyxFQUFFO2dCQUNwQyxNQUFNLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDO2FBQ3BCO1lBQ0QsR0FBRyxHQUFHLEdBQUcsQ0FBQyxRQUFRLENBQUMsQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDO1lBQzFCLENBQUMsR0FBRyxHQUFHLENBQUMsT0FBTyxDQUFDLFNBQVMsQ0FBQyxDQUFDO1NBQzVCO1FBQ0QsSUFBSSxDQUFDLFNBQVMsSUFBSSxHQUFHLENBQUMsTUFBTSxLQUFLLENBQUMsRUFBRTtZQUNsQyxNQUFNLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxDQUFDO1NBQ2xCO1FBQ0QsT0FBTztLQUNSO0lBQ0QsMkVBQTJFO0lBQzNFLGlDQUFpQztJQUNqQyxJQUFJLFVBQVUsR0FBRyxDQUFDLENBQUM7SUFDbkIsS0FBSyxJQUFJLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxHQUFHLEdBQUcsQ0FBQyxNQUFNLEdBQUcsQ0FBQyxFQUFFLENBQUMsRUFBRSxFQUFFO1FBQ3ZDLElBQUksQ0FBQyxDQUFDLEtBQUssR0FBRyxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUMsVUFBVSxDQUFDLE9BQU8sQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQyxFQUFFO1lBQzdELE1BQU0sS0FBSyxHQUFHLEdBQUcsQ0FBQyxRQUFRLENBQUMsVUFBVSxFQUFFLENBQUMsQ0FBQyxDQUFDO1lBQzFDLElBQUksQ0FBQyxTQUFTLElBQUksS0FBSyxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUU7Z0JBQ3BDLE1BQU0sQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLENBQUM7YUFDcEI7WUFDRCxVQUFVLEdBQUcsQ0FBQyxHQUFHLENBQUMsQ0FBQztTQUNwQjtLQUNGO0FBQ0gsQ0FBQztBQUVELE1BQU0sVUFBVSxlQUFlLENBQzNCLEtBQW1CLEVBQUUsU0FBcUIsRUFDMUMsU0FBa0I7SUFDcEIsTUFBTSxTQUFTLEdBQUcsS0FBSyxDQUFDLE1BQU0sQ0FBQztJQUUvQixnRUFBZ0U7SUFDaEUsTUFBTSxNQUFNLEdBQWlCLEVBQUUsQ0FBQztJQUVoQyxJQUFJLFVBQVUsR0FBRyxDQUFDLENBQUM7SUFDbkIsSUFBSSxhQUFhLEdBQUcsQ0FBQyxDQUFDO0lBQ3RCLE1BQU0sVUFBVSxHQUFhLElBQUksS0FBSyxDQUFDLFNBQVMsQ0FBQyxDQUFDO0lBQ2xELEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxTQUFTLEVBQUUsRUFBRSxDQUFDLEVBQUU7UUFDbEMsTUFBTSxnQkFBZ0IsR0FBRyxNQUFNLENBQUMsTUFBTSxDQUFDO1FBQ3ZDLEtBQUssQ0FBQyxLQUFLLENBQUMsQ0FBQyxDQUFDLEVBQUUsU0FBUyxFQUFFLFNBQVMsRUFBRSxNQUFNLENBQUMsQ0FBQztRQUM5QyxNQUFNLFFBQVEsR0FBRyxNQUFNLENBQUMsTUFBTSxHQUFHLGdCQUFnQixDQUFDO1FBQ2xELFVBQVUsQ0FBQyxDQUFDLENBQUMsR0FBRyxRQUFRLENBQUM7UUFDekIsVUFBVSxJQUFJLFFBQVEsQ0FBQztRQUN2QixhQUFhLEdBQUcsSUFBSSxDQUFDLEdBQUcsQ0FBQyxhQUFhLEVBQUUsUUFBUSxDQUFDLENBQUM7S0FDbkQ7SUFFRCxNQUFNLE9BQU8sR0FBRyxJQUFJLENBQUMsaUJBQWlCLENBQUMsT0FBTyxFQUFFLFVBQVUsR0FBRyxDQUFDLENBQWUsQ0FBQztJQUM5RSxNQUFNLE1BQU0sR0FBaUIsSUFBSSxLQUFLLENBQUMsVUFBVSxDQUFDLENBQUM7SUFDbkQsTUFBTSxLQUFLLEdBQXFCLENBQUMsU0FBUyxFQUFFLGFBQWEsQ0FBQyxDQUFDO0lBRTNELElBQUksQ0FBQyxHQUFHLENBQUMsQ0FBQztJQUNWLEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxTQUFTLEVBQUUsRUFBRSxDQUFDLEVBQUU7UUFDbEMsS0FBSyxJQUFJLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxHQUFHLFVBQVUsQ0FBQyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsRUFBRTtZQUN0Qyx1REFBdUQ7WUFDdkQsT0FBTyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUM7WUFDbkIsT0FBTyxDQUFDLENBQUMsR0FBRyxDQUFDLEdBQUcsQ0FBQyxDQUFDLEdBQUcsQ0FBQyxDQUFDO1lBQ3ZCLE1BQU0sQ0FBQyxDQUFDLENBQUMsR0FBRyxNQUFNLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFDdEIsRUFBRSxDQUFDLENBQUM7U0FDTDtLQUNGO0lBRUQsT0FBTyxDQUFDLE9BQU8sRUFBRSxNQUFNLEVBQUUsS0FBSyxDQUFDLENBQUM7QUFDbEMsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIi8qKlxuICogQGxpY2Vuc2VcbiAqIENvcHlyaWdodCAyMDIxIEdvb2dsZSBMTEMuIEFsbCBSaWdodHMgUmVzZXJ2ZWQuXG4gKiBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgXCJMaWNlbnNlXCIpO1xuICogeW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLlxuICogWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0XG4gKlxuICogaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wXG4gKlxuICogVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZVxuICogZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gXCJBUyBJU1wiIEJBU0lTLFxuICogV0lUSE9VVCBXQVJSQU5USUVTIE9SIENPTkRJVElPTlMgT0YgQU5ZIEtJTkQsIGVpdGhlciBleHByZXNzIG9yIGltcGxpZWQuXG4gKiBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kXG4gKiBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS5cbiAqID09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4gKi9cblxuaW1wb3J0IHtUeXBlZEFycmF5LCB1dGlsfSBmcm9tICdAdGVuc29yZmxvdy90ZmpzLWNvcmUnO1xuXG5mdW5jdGlvbiBzcGxpdChcbiAgICBzdHI6IFVpbnQ4QXJyYXksIGRlbGltaXRlcnM6IFVpbnQ4QXJyYXksIHNraXBFbXB0eTogYm9vbGVhbixcbiAgICByZXN1bHQ6IFVpbnQ4QXJyYXlbXSk6IHZvaWQge1xuICBpZiAoIXN0ci5sZW5ndGgpIHtcbiAgICByZXR1cm47XG4gIH1cbiAgLy8gV2hlbiB0aGUgZGVsaW1pdGVyIGlzIGVtcHR5LCB0aGUgaW5wdXQgaXMgc3BsaXQgaW50byBpbmRpdmlkdWFsIGNoYXJhY3RlcnMuXG4gIGlmIChkZWxpbWl0ZXJzLmxlbmd0aCA9PT0gMCkge1xuICAgIGZvciAobGV0IGkgPSAwOyBpIDwgc3RyLmxlbmd0aDsgKytpKSB7XG4gICAgICByZXN1bHQucHVzaChzdHIuc3ViYXJyYXkoaSwgaSArIDEpKTtcbiAgICB9XG4gICAgcmV0dXJuO1xuICB9XG4gIC8vIFdoZW4gdGhlcmUgaXMgb25lIGRlbGltaXRlciwgdGhlIGlucHV0IGlzIHNwbGl0IG9ubHkgYXQgdGhhdCBkZWxpbWl0ZXIuXG4gIGlmIChkZWxpbWl0ZXJzLmxlbmd0aCA9PT0gMSkge1xuICAgIGNvbnN0IGRlbGltaXRlciA9IGRlbGltaXRlcnNbMF07XG4gICAgbGV0IGYgPSBzdHIuaW5kZXhPZihkZWxpbWl0ZXIpO1xuICAgIHdoaWxlIChmICE9PSAtMSkge1xuICAgICAgY29uc3QgdG9rZW4gPSBzdHIuc3ViYXJyYXkoMCwgZik7XG4gICAgICBpZiAoIXNraXBFbXB0eSB8fCB0b2tlbi5sZW5ndGggIT09IDApIHtcbiAgICAgICAgcmVzdWx0LnB1c2godG9rZW4pO1xuICAgICAgfVxuICAgICAgc3RyID0gc3RyLnN1YmFycmF5KGYgKyAxKTtcbiAgICAgIGYgPSBzdHIuaW5kZXhPZihkZWxpbWl0ZXIpO1xuICAgIH1cbiAgICBpZiAoIXNraXBFbXB0eSB8fCBzdHIubGVuZ3RoICE9PSAwKSB7XG4gICAgICByZXN1bHQucHVzaChzdHIpO1xuICAgIH1cbiAgICByZXR1cm47XG4gIH1cbiAgLy8gV2hlbiB0aGVyZSBhcmUgbXVsdGlwbGUgZGVsaW1pdGVycywgdGhlIGlucHV0IGlzIHNwbGl0IGF0IGV2ZXJ5IGluc3RhbmNlXG4gIC8vIG9uZSBvZiB0aGUgZGVsaW1pdGVycyBhcHBlYXJzLlxuICBsZXQgdG9rZW5TdGFydCA9IDA7XG4gIGZvciAobGV0IGkgPSAwOyBpIDwgc3RyLmxlbmd0aCArIDE7IGkrKykge1xuICAgIGlmICgoaSA9PT0gc3RyLmxlbmd0aCkgfHwgKGRlbGltaXRlcnMuaW5kZXhPZihzdHJbaV0pICE9PSAtMSkpIHtcbiAgICAgIGNvbnN0IHRva2VuID0gc3RyLnN1YmFycmF5KHRva2VuU3RhcnQsIGkpO1xuICAgICAgaWYgKCFza2lwRW1wdHkgfHwgdG9rZW4ubGVuZ3RoICE9PSAwKSB7XG4gICAgICAgIHJlc3VsdC5wdXNoKHRva2VuKTtcbiAgICAgIH1cbiAgICAgIHRva2VuU3RhcnQgPSBpICsgMTtcbiAgICB9XG4gIH1cbn1cblxuZXhwb3J0IGZ1bmN0aW9uIHN0cmluZ1NwbGl0SW1wbChcbiAgICBpbnB1dDogVWludDhBcnJheVtdLCBkZWxpbWl0ZXI6IFVpbnQ4QXJyYXksXG4gICAgc2tpcEVtcHR5OiBib29sZWFuKTogW1R5cGVkQXJyYXksIFVpbnQ4QXJyYXlbXSwgW251bWJlciwgbnVtYmVyXV0ge1xuICBjb25zdCBiYXRjaFNpemUgPSBpbnB1dC5sZW5ndGg7XG5cbiAgLy8gRW1wdHkgZGVsaW1pdGVyIG1lYW5zIHNwbGl0IHRoZSBpbnB1dCBjaGFyYWN0ZXIgYnkgY2hhcmFjdGVyLlxuICBjb25zdCB0b2tlbnM6IFVpbnQ4QXJyYXlbXSA9IFtdO1xuXG4gIGxldCBvdXRwdXRTaXplID0gMDtcbiAgbGV0IG1heE51bUVudHJpZXMgPSAwO1xuICBjb25zdCBudW1JbmRpY2VzOiBudW1iZXJbXSA9IG5ldyBBcnJheShiYXRjaFNpemUpO1xuICBmb3IgKGxldCBpID0gMDsgaSA8IGJhdGNoU2l6ZTsgKytpKSB7XG4gICAgY29uc3QgcHJldlRva2Vuc0xlbmd0aCA9IHRva2Vucy5sZW5ndGg7XG4gICAgc3BsaXQoaW5wdXRbaV0sIGRlbGltaXRlciwgc2tpcEVtcHR5LCB0b2tlbnMpO1xuICAgIGNvbnN0IG5FbnRyaWVzID0gdG9rZW5zLmxlbmd0aCAtIHByZXZUb2tlbnNMZW5ndGg7XG4gICAgbnVtSW5kaWNlc1tpXSA9IG5FbnRyaWVzO1xuICAgIG91dHB1dFNpemUgKz0gbkVudHJpZXM7XG4gICAgbWF4TnVtRW50cmllcyA9IE1hdGgubWF4KG1heE51bUVudHJpZXMsIG5FbnRyaWVzKTtcbiAgfVxuXG4gIGNvbnN0IGluZGljZXMgPSB1dGlsLmdldEFycmF5RnJvbURUeXBlKCdpbnQzMicsIG91dHB1dFNpemUgKiAyKSBhcyBUeXBlZEFycmF5O1xuICBjb25zdCB2YWx1ZXM6IFVpbnQ4QXJyYXlbXSA9IG5ldyBBcnJheShvdXRwdXRTaXplKTtcbiAgY29uc3Qgc2hhcGU6IFtudW1iZXIsIG51bWJlcl0gPSBbYmF0Y2hTaXplLCBtYXhOdW1FbnRyaWVzXTtcblxuICBsZXQgYyA9IDA7XG4gIGZvciAobGV0IGkgPSAwOyBpIDwgYmF0Y2hTaXplOyArK2kpIHtcbiAgICBmb3IgKGxldCBqID0gMDsgaiA8IG51bUluZGljZXNbaV07ICsraikge1xuICAgICAgLy8gaW5kaWNlcyBpcyBhIDJkIHRlbnNvciB3aXRoIHNoYXBlIG9mIFtvdXRwdXRTaXplLCAyXVxuICAgICAgaW5kaWNlc1tjICogMl0gPSBpO1xuICAgICAgaW5kaWNlc1tjICogMiArIDFdID0gajtcbiAgICAgIHZhbHVlc1tjXSA9IHRva2Vuc1tjXTtcbiAgICAgICsrYztcbiAgICB9XG4gIH1cblxuICByZXR1cm4gW2luZGljZXMsIHZhbHVlcywgc2hhcGVdO1xufVxuIl19
|