gx
chenyc
2025-06-12 7b72ac13a83764a662159d4a49b7fffb90476ecb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/**
 * @license
 * Copyright 2021 Google LLC. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =============================================================================
 */
import { util } from '@tensorflow/tfjs-core';
function split(str, delimiters, skipEmpty, result) {
    if (!str.length) {
        return;
    }
    // When the delimiter is empty, the input is split into individual characters.
    if (delimiters.length === 0) {
        for (let i = 0; i < str.length; ++i) {
            result.push(str.subarray(i, i + 1));
        }
        return;
    }
    // When there is one delimiter, the input is split only at that delimiter.
    if (delimiters.length === 1) {
        const delimiter = delimiters[0];
        let f = str.indexOf(delimiter);
        while (f !== -1) {
            const token = str.subarray(0, f);
            if (!skipEmpty || token.length !== 0) {
                result.push(token);
            }
            str = str.subarray(f + 1);
            f = str.indexOf(delimiter);
        }
        if (!skipEmpty || str.length !== 0) {
            result.push(str);
        }
        return;
    }
    // When there are multiple delimiters, the input is split at every instance
    // one of the delimiters appears.
    let tokenStart = 0;
    for (let i = 0; i < str.length + 1; i++) {
        if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {
            const token = str.subarray(tokenStart, i);
            if (!skipEmpty || token.length !== 0) {
                result.push(token);
            }
            tokenStart = i + 1;
        }
    }
}
export function stringSplitImpl(input, delimiter, skipEmpty) {
    const batchSize = input.length;
    // Empty delimiter means split the input character by character.
    const tokens = [];
    let outputSize = 0;
    let maxNumEntries = 0;
    const numIndices = new Array(batchSize);
    for (let i = 0; i < batchSize; ++i) {
        const prevTokensLength = tokens.length;
        split(input[i], delimiter, skipEmpty, tokens);
        const nEntries = tokens.length - prevTokensLength;
        numIndices[i] = nEntries;
        outputSize += nEntries;
        maxNumEntries = Math.max(maxNumEntries, nEntries);
    }
    const indices = util.getArrayFromDType('int32', outputSize * 2);
    const values = new Array(outputSize);
    const shape = [batchSize, maxNumEntries];
    let c = 0;
    for (let i = 0; i < batchSize; ++i) {
        for (let j = 0; j < numIndices[i]; ++j) {
            // indices is a 2d tensor with shape of [outputSize, 2]
            indices[c * 2] = i;
            indices[c * 2 + 1] = j;
            values[c] = tokens[c];
            ++c;
        }
    }
    return [indices, values, shape];
}
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiU3RyaW5nU3BsaXRfaW1wbC5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uLy4uLy4uLy4uLy4uL3RmanMtYmFja2VuZC1jcHUvc3JjL2tlcm5lbHMvU3RyaW5nU3BsaXRfaW1wbC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQTs7Ozs7Ozs7Ozs7Ozs7O0dBZUc7QUFFSCxPQUFPLEVBQWEsSUFBSSxFQUFDLE1BQU0sdUJBQXVCLENBQUM7QUFFdkQsU0FBUyxLQUFLLENBQ1YsR0FBZSxFQUFFLFVBQXNCLEVBQUUsU0FBa0IsRUFDM0QsTUFBb0I7SUFDdEIsSUFBSSxDQUFDLEdBQUcsQ0FBQyxNQUFNLEVBQUU7UUFDZixPQUFPO0tBQ1I7SUFDRCw4RUFBOEU7SUFDOUUsSUFBSSxVQUFVLENBQUMsTUFBTSxLQUFLLENBQUMsRUFBRTtRQUMzQixLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsRUFBRSxDQUFDLEdBQUcsR0FBRyxDQUFDLE1BQU0sRUFBRSxFQUFFLENBQUMsRUFBRTtZQUNuQyxNQUFNLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxRQUFRLENBQUMsQ0FBQyxFQUFFLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxDQUFDO1NBQ3JDO1FBQ0QsT0FBTztLQUNSO0lBQ0QsMEVBQTBFO0lBQzFFLElBQUksVUFBVSxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUU7UUFDM0IsTUFBTSxTQUFTLEdBQUcsVUFBVSxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBQ2hDLElBQUksQ0FBQyxHQUFHLEdBQUcsQ0FBQyxPQUFPLENBQUMsU0FBUyxDQUFDLENBQUM7UUFDL0IsT0FBTyxDQUFDLEtBQUssQ0FBQyxDQUFDLEVBQUU7WUFDZixNQUFNLEtBQUssR0FBRyxHQUFHLENBQUMsUUFBUSxDQUFDLENBQUMsRUFBRSxDQUFDLENBQUMsQ0FBQztZQUNqQyxJQUFJLENBQUMsU0FBUyxJQUFJLEtBQUssQ0FBQyxNQUFNLEtBQUssQ0FBQyxFQUFFO2dCQUNwQyxNQUFNLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDO2FBQ3BCO1lBQ0QsR0FBRyxHQUFHLEdBQUcsQ0FBQyxRQUFRLENBQUMsQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDO1lBQzFCLENBQUMsR0FBRyxHQUFHLENBQUMsT0FBTyxDQUFDLFNBQVMsQ0FBQyxDQUFDO1NBQzVCO1FBQ0QsSUFBSSxDQUFDLFNBQVMsSUFBSSxHQUFHLENBQUMsTUFBTSxLQUFLLENBQUMsRUFBRTtZQUNsQyxNQUFNLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxDQUFDO1NBQ2xCO1FBQ0QsT0FBTztLQUNSO0lBQ0QsMkVBQTJFO0lBQzNFLGlDQUFpQztJQUNqQyxJQUFJLFVBQVUsR0FBRyxDQUFDLENBQUM7SUFDbkIsS0FBSyxJQUFJLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxHQUFHLEdBQUcsQ0FBQyxNQUFNLEdBQUcsQ0FBQyxFQUFFLENBQUMsRUFBRSxFQUFFO1FBQ3ZDLElBQUksQ0FBQyxDQUFDLEtBQUssR0FBRyxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUMsVUFBVSxDQUFDLE9BQU8sQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQyxFQUFFO1lBQzdELE1BQU0sS0FBSyxHQUFHLEdBQUcsQ0FBQyxRQUFRLENBQUMsVUFBVSxFQUFFLENBQUMsQ0FBQyxDQUFDO1lBQzFDLElBQUksQ0FBQyxTQUFTLElBQUksS0FBSyxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUU7Z0JBQ3BDLE1BQU0sQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLENBQUM7YUFDcEI7WUFDRCxVQUFVLEdBQUcsQ0FBQyxHQUFHLENBQUMsQ0FBQztTQUNwQjtLQUNGO0FBQ0gsQ0FBQztBQUVELE1BQU0sVUFBVSxlQUFlLENBQzNCLEtBQW1CLEVBQUUsU0FBcUIsRUFDMUMsU0FBa0I7SUFDcEIsTUFBTSxTQUFTLEdBQUcsS0FBSyxDQUFDLE1BQU0sQ0FBQztJQUUvQixnRUFBZ0U7SUFDaEUsTUFBTSxNQUFNLEdBQWlCLEVBQUUsQ0FBQztJQUVoQyxJQUFJLFVBQVUsR0FBRyxDQUFDLENBQUM7SUFDbkIsSUFBSSxhQUFhLEdBQUcsQ0FBQyxDQUFDO0lBQ3RCLE1BQU0sVUFBVSxHQUFhLElBQUksS0FBSyxDQUFDLFNBQVMsQ0FBQyxDQUFDO0lBQ2xELEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxTQUFTLEVBQUUsRUFBRSxDQUFDLEVBQUU7UUFDbEMsTUFBTSxnQkFBZ0IsR0FBRyxNQUFNLENBQUMsTUFBTSxDQUFDO1FBQ3ZDLEtBQUssQ0FBQyxLQUFLLENBQUMsQ0FBQyxDQUFDLEVBQUUsU0FBUyxFQUFFLFNBQVMsRUFBRSxNQUFNLENBQUMsQ0FBQztRQUM5QyxNQUFNLFFBQVEsR0FBRyxNQUFNLENBQUMsTUFBTSxHQUFHLGdCQUFnQixDQUFDO1FBQ2xELFVBQVUsQ0FBQyxDQUFDLENBQUMsR0FBRyxRQUFRLENBQUM7UUFDekIsVUFBVSxJQUFJLFFBQVEsQ0FBQztRQUN2QixhQUFhLEdBQUcsSUFBSSxDQUFDLEdBQUcsQ0FBQyxhQUFhLEVBQUUsUUFBUSxDQUFDLENBQUM7S0FDbkQ7SUFFRCxNQUFNLE9BQU8sR0FBRyxJQUFJLENBQUMsaUJBQWlCLENBQUMsT0FBTyxFQUFFLFVBQVUsR0FBRyxDQUFDLENBQWUsQ0FBQztJQUM5RSxNQUFNLE1BQU0sR0FBaUIsSUFBSSxLQUFLLENBQUMsVUFBVSxDQUFDLENBQUM7SUFDbkQsTUFBTSxLQUFLLEdBQXFCLENBQUMsU0FBUyxFQUFFLGFBQWEsQ0FBQyxDQUFDO0lBRTNELElBQUksQ0FBQyxHQUFHLENBQUMsQ0FBQztJQUNWLEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLENBQUMsR0FBRyxTQUFTLEVBQUUsRUFBRSxDQUFDLEVBQUU7UUFDbEMsS0FBSyxJQUFJLENBQUMsR0FBRyxDQUFDLEVBQUUsQ0FBQyxHQUFHLFVBQVUsQ0FBQyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsRUFBRTtZQUN0Qyx1REFBdUQ7WUFDdkQsT0FBTyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUM7WUFDbkIsT0FBTyxDQUFDLENBQUMsR0FBRyxDQUFDLEdBQUcsQ0FBQyxDQUFDLEdBQUcsQ0FBQyxDQUFDO1lBQ3ZCLE1BQU0sQ0FBQyxDQUFDLENBQUMsR0FBRyxNQUFNLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFDdEIsRUFBRSxDQUFDLENBQUM7U0FDTDtLQUNGO0lBRUQsT0FBTyxDQUFDLE9BQU8sRUFBRSxNQUFNLEVBQUUsS0FBSyxDQUFDLENBQUM7QUFDbEMsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIi8qKlxuICogQGxpY2Vuc2VcbiAqIENvcHlyaWdodCAyMDIxIEdvb2dsZSBMTEMuIEFsbCBSaWdodHMgUmVzZXJ2ZWQuXG4gKiBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgXCJMaWNlbnNlXCIpO1xuICogeW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLlxuICogWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0XG4gKlxuICogaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wXG4gKlxuICogVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZVxuICogZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gXCJBUyBJU1wiIEJBU0lTLFxuICogV0lUSE9VVCBXQVJSQU5USUVTIE9SIENPTkRJVElPTlMgT0YgQU5ZIEtJTkQsIGVpdGhlciBleHByZXNzIG9yIGltcGxpZWQuXG4gKiBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kXG4gKiBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS5cbiAqID09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4gKi9cblxuaW1wb3J0IHtUeXBlZEFycmF5LCB1dGlsfSBmcm9tICdAdGVuc29yZmxvdy90ZmpzLWNvcmUnO1xuXG5mdW5jdGlvbiBzcGxpdChcbiAgICBzdHI6IFVpbnQ4QXJyYXksIGRlbGltaXRlcnM6IFVpbnQ4QXJyYXksIHNraXBFbXB0eTogYm9vbGVhbixcbiAgICByZXN1bHQ6IFVpbnQ4QXJyYXlbXSk6IHZvaWQge1xuICBpZiAoIXN0ci5sZW5ndGgpIHtcbiAgICByZXR1cm47XG4gIH1cbiAgLy8gV2hlbiB0aGUgZGVsaW1pdGVyIGlzIGVtcHR5LCB0aGUgaW5wdXQgaXMgc3BsaXQgaW50byBpbmRpdmlkdWFsIGNoYXJhY3RlcnMuXG4gIGlmIChkZWxpbWl0ZXJzLmxlbmd0aCA9PT0gMCkge1xuICAgIGZvciAobGV0IGkgPSAwOyBpIDwgc3RyLmxlbmd0aDsgKytpKSB7XG4gICAgICByZXN1bHQucHVzaChzdHIuc3ViYXJyYXkoaSwgaSArIDEpKTtcbiAgICB9XG4gICAgcmV0dXJuO1xuICB9XG4gIC8vIFdoZW4gdGhlcmUgaXMgb25lIGRlbGltaXRlciwgdGhlIGlucHV0IGlzIHNwbGl0IG9ubHkgYXQgdGhhdCBkZWxpbWl0ZXIuXG4gIGlmIChkZWxpbWl0ZXJzLmxlbmd0aCA9PT0gMSkge1xuICAgIGNvbnN0IGRlbGltaXRlciA9IGRlbGltaXRlcnNbMF07XG4gICAgbGV0IGYgPSBzdHIuaW5kZXhPZihkZWxpbWl0ZXIpO1xuICAgIHdoaWxlIChmICE9PSAtMSkge1xuICAgICAgY29uc3QgdG9rZW4gPSBzdHIuc3ViYXJyYXkoMCwgZik7XG4gICAgICBpZiAoIXNraXBFbXB0eSB8fCB0b2tlbi5sZW5ndGggIT09IDApIHtcbiAgICAgICAgcmVzdWx0LnB1c2godG9rZW4pO1xuICAgICAgfVxuICAgICAgc3RyID0gc3RyLnN1YmFycmF5KGYgKyAxKTtcbiAgICAgIGYgPSBzdHIuaW5kZXhPZihkZWxpbWl0ZXIpO1xuICAgIH1cbiAgICBpZiAoIXNraXBFbXB0eSB8fCBzdHIubGVuZ3RoICE9PSAwKSB7XG4gICAgICByZXN1bHQucHVzaChzdHIpO1xuICAgIH1cbiAgICByZXR1cm47XG4gIH1cbiAgLy8gV2hlbiB0aGVyZSBhcmUgbXVsdGlwbGUgZGVsaW1pdGVycywgdGhlIGlucHV0IGlzIHNwbGl0IGF0IGV2ZXJ5IGluc3RhbmNlXG4gIC8vIG9uZSBvZiB0aGUgZGVsaW1pdGVycyBhcHBlYXJzLlxuICBsZXQgdG9rZW5TdGFydCA9IDA7XG4gIGZvciAobGV0IGkgPSAwOyBpIDwgc3RyLmxlbmd0aCArIDE7IGkrKykge1xuICAgIGlmICgoaSA9PT0gc3RyLmxlbmd0aCkgfHwgKGRlbGltaXRlcnMuaW5kZXhPZihzdHJbaV0pICE9PSAtMSkpIHtcbiAgICAgIGNvbnN0IHRva2VuID0gc3RyLnN1YmFycmF5KHRva2VuU3RhcnQsIGkpO1xuICAgICAgaWYgKCFza2lwRW1wdHkgfHwgdG9rZW4ubGVuZ3RoICE9PSAwKSB7XG4gICAgICAgIHJlc3VsdC5wdXNoKHRva2VuKTtcbiAgICAgIH1cbiAgICAgIHRva2VuU3RhcnQgPSBpICsgMTtcbiAgICB9XG4gIH1cbn1cblxuZXhwb3J0IGZ1bmN0aW9uIHN0cmluZ1NwbGl0SW1wbChcbiAgICBpbnB1dDogVWludDhBcnJheVtdLCBkZWxpbWl0ZXI6IFVpbnQ4QXJyYXksXG4gICAgc2tpcEVtcHR5OiBib29sZWFuKTogW1R5cGVkQXJyYXksIFVpbnQ4QXJyYXlbXSwgW251bWJlciwgbnVtYmVyXV0ge1xuICBjb25zdCBiYXRjaFNpemUgPSBpbnB1dC5sZW5ndGg7XG5cbiAgLy8gRW1wdHkgZGVsaW1pdGVyIG1lYW5zIHNwbGl0IHRoZSBpbnB1dCBjaGFyYWN0ZXIgYnkgY2hhcmFjdGVyLlxuICBjb25zdCB0b2tlbnM6IFVpbnQ4QXJyYXlbXSA9IFtdO1xuXG4gIGxldCBvdXRwdXRTaXplID0gMDtcbiAgbGV0IG1heE51bUVudHJpZXMgPSAwO1xuICBjb25zdCBudW1JbmRpY2VzOiBudW1iZXJbXSA9IG5ldyBBcnJheShiYXRjaFNpemUpO1xuICBmb3IgKGxldCBpID0gMDsgaSA8IGJhdGNoU2l6ZTsgKytpKSB7XG4gICAgY29uc3QgcHJldlRva2Vuc0xlbmd0aCA9IHRva2Vucy5sZW5ndGg7XG4gICAgc3BsaXQoaW5wdXRbaV0sIGRlbGltaXRlciwgc2tpcEVtcHR5LCB0b2tlbnMpO1xuICAgIGNvbnN0IG5FbnRyaWVzID0gdG9rZW5zLmxlbmd0aCAtIHByZXZUb2tlbnNMZW5ndGg7XG4gICAgbnVtSW5kaWNlc1tpXSA9IG5FbnRyaWVzO1xuICAgIG91dHB1dFNpemUgKz0gbkVudHJpZXM7XG4gICAgbWF4TnVtRW50cmllcyA9IE1hdGgubWF4KG1heE51bUVudHJpZXMsIG5FbnRyaWVzKTtcbiAgfVxuXG4gIGNvbnN0IGluZGljZXMgPSB1dGlsLmdldEFycmF5RnJvbURUeXBlKCdpbnQzMicsIG91dHB1dFNpemUgKiAyKSBhcyBUeXBlZEFycmF5O1xuICBjb25zdCB2YWx1ZXM6IFVpbnQ4QXJyYXlbXSA9IG5ldyBBcnJheShvdXRwdXRTaXplKTtcbiAgY29uc3Qgc2hhcGU6IFtudW1iZXIsIG51bWJlcl0gPSBbYmF0Y2hTaXplLCBtYXhOdW1FbnRyaWVzXTtcblxuICBsZXQgYyA9IDA7XG4gIGZvciAobGV0IGkgPSAwOyBpIDwgYmF0Y2hTaXplOyArK2kpIHtcbiAgICBmb3IgKGxldCBqID0gMDsgaiA8IG51bUluZGljZXNbaV07ICsraikge1xuICAgICAgLy8gaW5kaWNlcyBpcyBhIDJkIHRlbnNvciB3aXRoIHNoYXBlIG9mIFtvdXRwdXRTaXplLCAyXVxuICAgICAgaW5kaWNlc1tjICogMl0gPSBpO1xuICAgICAgaW5kaWNlc1tjICogMiArIDFdID0gajtcbiAgICAgIHZhbHVlc1tjXSA9IHRva2Vuc1tjXTtcbiAgICAgICsrYztcbiAgICB9XG4gIH1cblxuICByZXR1cm4gW2luZGljZXMsIHZhbHVlcywgc2hhcGVdO1xufVxuIl19