gx
chenyc
2025-06-12 7b72ac13a83764a662159d4a49b7fffb90476ecb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/**
 * @license
 * Copyright 2021 Google LLC. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =============================================================================
 */
import { util } from '@tensorflow/tfjs-core';
function split(str, delimiters, skipEmpty, result) {
    if (!str.length) {
        return;
    }
    // When the delimiter is empty, the input is split into individual characters.
    if (delimiters.length === 0) {
        for (let i = 0; i < str.length; ++i) {
            result.push(str.subarray(i, i + 1));
        }
        return;
    }
    // When there is one delimiter, the input is split only at that delimiter.
    if (delimiters.length === 1) {
        const delimiter = delimiters[0];
        let f = str.indexOf(delimiter);
        while (f !== -1) {
            const token = str.subarray(0, f);
            if (!skipEmpty || token.length !== 0) {
                result.push(token);
            }
            str = str.subarray(f + 1);
            f = str.indexOf(delimiter);
        }
        if (!skipEmpty || str.length !== 0) {
            result.push(str);
        }
        return;
    }
    // When there are multiple delimiters, the input is split at every instance
    // one of the delimiters appears.
    let tokenStart = 0;
    for (let i = 0; i < str.length + 1; i++) {
        if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {
            const token = str.subarray(tokenStart, i);
            if (!skipEmpty || token.length !== 0) {
                result.push(token);
            }
            tokenStart = i + 1;
        }
    }
}
export function stringSplitImpl(input, delimiter, skipEmpty) {
    const batchSize = input.length;
    // Empty delimiter means split the input character by character.
    const tokens = [];
    let outputSize = 0;
    let maxNumEntries = 0;
    const numIndices = new Array(batchSize);
    for (let i = 0; i < batchSize; ++i) {
        const prevTokensLength = tokens.length;
        split(input[i], delimiter, skipEmpty, tokens);
        const nEntries = tokens.length - prevTokensLength;
        numIndices[i] = nEntries;
        outputSize += nEntries;
        maxNumEntries = Math.max(maxNumEntries, nEntries);
    }
    const indices = util.getArrayFromDType('int32', outputSize * 2);
    const values = new Array(outputSize);
    const shape = [batchSize, maxNumEntries];
    let c = 0;
    for (let i = 0; i < batchSize; ++i) {
        for (let j = 0; j < numIndices[i]; ++j) {
            // indices is a 2d tensor with shape of [outputSize, 2]
            indices[c * 2] = i;
            indices[c * 2 + 1] = j;
            values[c] = tokens[c];
            ++c;
        }
    }
    return [indices, values, shape];
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"StringSplit_impl.js","sourceRoot":"","sources":["../../../../../../tfjs-backend-cpu/src/kernels/StringSplit_impl.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAa,IAAI,EAAC,MAAM,uBAAuB,CAAC;AAEvD,SAAS,KAAK,CACV,GAAe,EAAE,UAAsB,EAAE,SAAkB,EAC3D,MAAoB;IACtB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE;QACf,OAAO;KACR;IACD,8EAA8E;IAC9E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YACnC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACrC;QACD,OAAO;KACR;IACD,0EAA0E;IAC1E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;QAC3B,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAChC,IAAI,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE;YACf,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACjC,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;gBACpC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;YACD,GAAG,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1B,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;SAC5B;QACD,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE;YAClC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAClB;QACD,OAAO;KACR;IACD,2EAA2E;IAC3E,iCAAiC;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;QACvC,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YAC7D,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAC1C,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;gBACpC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACpB;YACD,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;SACpB;KACF;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAC3B,KAAmB,EAAE,SAAqB,EAC1C,SAAkB;IACpB,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;IAE/B,gEAAgE;IAChE,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,MAAM,UAAU,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,EAAE,CAAC,EAAE;QAClC,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC;QACvC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,GAAG,gBAAgB,CAAC;QAClD,UAAU,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC;QACzB,UAAU,IAAI,QAAQ,CAAC;QACvB,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;KACnD;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,UAAU,GAAG,CAAC,CAAe,CAAC;IAC9E,MAAM,MAAM,GAAiB,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,MAAM,KAAK,GAAqB,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAE3D,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,EAAE,CAAC,EAAE;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE;YACtC,uDAAuD;YACvD,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACnB,OAAO,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACvB,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACtB,EAAE,CAAC,CAAC;SACL;KACF;IAED,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;AAClC,CAAC","sourcesContent":["/**\n * @license\n * Copyright 2021 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n\nimport {TypedArray, util} from '@tensorflow/tfjs-core';\n\nfunction split(\n    str: Uint8Array, delimiters: Uint8Array, skipEmpty: boolean,\n    result: Uint8Array[]): void {\n  if (!str.length) {\n    return;\n  }\n  // When the delimiter is empty, the input is split into individual characters.\n  if (delimiters.length === 0) {\n    for (let i = 0; i < str.length; ++i) {\n      result.push(str.subarray(i, i + 1));\n    }\n    return;\n  }\n  // When there is one delimiter, the input is split only at that delimiter.\n  if (delimiters.length === 1) {\n    const delimiter = delimiters[0];\n    let f = str.indexOf(delimiter);\n    while (f !== -1) {\n      const token = str.subarray(0, f);\n      if (!skipEmpty || token.length !== 0) {\n        result.push(token);\n      }\n      str = str.subarray(f + 1);\n      f = str.indexOf(delimiter);\n    }\n    if (!skipEmpty || str.length !== 0) {\n      result.push(str);\n    }\n    return;\n  }\n  // When there are multiple delimiters, the input is split at every instance\n  // one of the delimiters appears.\n  let tokenStart = 0;\n  for (let i = 0; i < str.length + 1; i++) {\n    if ((i === str.length) || (delimiters.indexOf(str[i]) !== -1)) {\n      const token = str.subarray(tokenStart, i);\n      if (!skipEmpty || token.length !== 0) {\n        result.push(token);\n      }\n      tokenStart = i + 1;\n    }\n  }\n}\n\nexport function stringSplitImpl(\n    input: Uint8Array[], delimiter: Uint8Array,\n    skipEmpty: boolean): [TypedArray, Uint8Array[], [number, number]] {\n  const batchSize = input.length;\n\n  // Empty delimiter means split the input character by character.\n  const tokens: Uint8Array[] = [];\n\n  let outputSize = 0;\n  let maxNumEntries = 0;\n  const numIndices: number[] = new Array(batchSize);\n  for (let i = 0; i < batchSize; ++i) {\n    const prevTokensLength = tokens.length;\n    split(input[i], delimiter, skipEmpty, tokens);\n    const nEntries = tokens.length - prevTokensLength;\n    numIndices[i] = nEntries;\n    outputSize += nEntries;\n    maxNumEntries = Math.max(maxNumEntries, nEntries);\n  }\n\n  const indices = util.getArrayFromDType('int32', outputSize * 2) as TypedArray;\n  const values: Uint8Array[] = new Array(outputSize);\n  const shape: [number, number] = [batchSize, maxNumEntries];\n\n  let c = 0;\n  for (let i = 0; i < batchSize; ++i) {\n    for (let j = 0; j < numIndices[i]; ++j) {\n      // indices is a 2d tensor with shape of [outputSize, 2]\n      indices[c * 2] = i;\n      indices[c * 2 + 1] = j;\n      values[c] = tokens[c];\n      ++c;\n    }\n  }\n\n  return [indices, values, shape];\n}\n"]}