gx
chenyc
2025-02-12 ea42ff3ebee1eeb3fb29423aa848a249441db81c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/**
 * @license
 * Copyright 2021 Google LLC. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =============================================================================
 */
import { ENGINE } from '../../engine';
import { StringSplit } from '../../kernel_names';
import { convertToTensor } from '../../tensor_util_env';
import { op } from '../operation';
/**
 * Split elements of `input` based on `delimiter` into a SparseTensor .
 *
 * Let N be the size of source (typically N will be the batch size). Split each
 * element of `input` based on `delimiter` and return a SparseTensor containing
 * the splitted tokens. Empty tokens are ignored if `skipEmpty` is set to True.
 *
 * `delimiter` can be empty, or a string of split characters. If `delimiter` is
 * an empty string, each element of `input` is split into individual
 * character strings. Otherwise every character of `delimiter` is a potential
 * split point.
 *
 * ```js
 * const result = tf.string.stringSplit(['hello world',  'a b c'], ' ');
 * result['indices'].print(); // [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]]
 * result['values'].print(); // ['hello', 'world', 'a', 'b', 'c']
 * result['shape'].print(); // [2, 3]
 * ```
 * @param input: 1-D. Strings to split.
 * @param delimiter: 0-D. Delimiter characters, or empty string.
 * @param skipEmpty: Optional. If true, skip the empty strings from the result.
 *     Defaults to true.
 * @return A map with the following properties:
 *     - indices: A dense matrix of int32 representing the indices of the sparse
 *       tensor.
 *     - values: A vector of strings corresponding to the splited values.
 *     - shape: a length-2 vector of int32 representing the shape of the sparse
 * tensor, where the first value is N and the second value is the maximum number
 * of tokens in a single input entry.
 *
 * @doc {heading: 'Operations', subheading: 'String'}
 */
function stringSplit_(input, delimiter, skipEmpty = true) {
    const $input = convertToTensor(input, 'input', 'stringSplit', 'string');
    const $delimiter = convertToTensor(delimiter, 'delimiter', 'stringSplit', 'string');
    if ($input.rank !== 1) {
        throw new Error(`Input should be Tensor1D but received shape ${$input.shape}`);
    }
    if ($delimiter.rank !== 0) {
        throw new Error(`Delimiter should be a scalar but received shape ${$delimiter.shape}`);
    }
    const attrs = { skipEmpty };
    const inputs = { input: $input, delimiter: $delimiter };
    const result = ENGINE.runKernel(StringSplit, inputs, attrs);
    return { indices: result[0], values: result[1], shape: result[2] };
}
export const stringSplit = /* @__PURE__ */ op({ stringSplit_ });
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic3RyaW5nX3NwbGl0LmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vLi4vLi4vLi4vLi4vLi4vdGZqcy1jb3JlL3NyYy9vcHMvc3RyaW5nL3N0cmluZ19zcGxpdC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQTs7Ozs7Ozs7Ozs7Ozs7O0dBZUc7QUFFSCxPQUFPLEVBQUMsTUFBTSxFQUFDLE1BQU0sY0FBYyxDQUFDO0FBQ3BDLE9BQU8sRUFBQyxXQUFXLEVBQXNDLE1BQU0sb0JBQW9CLENBQUM7QUFHcEYsT0FBTyxFQUFDLGVBQWUsRUFBQyxNQUFNLHVCQUF1QixDQUFDO0FBRXRELE9BQU8sRUFBQyxFQUFFLEVBQUMsTUFBTSxjQUFjLENBQUM7QUFFaEM7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7R0ErQkc7QUFDSCxTQUFTLFlBQVksQ0FDakIsS0FBMEIsRUFBRSxTQUE0QixFQUN4RCxTQUFTLEdBQUcsSUFBSTtJQUNsQixNQUFNLE1BQU0sR0FBRyxlQUFlLENBQUMsS0FBSyxFQUFFLE9BQU8sRUFBRSxhQUFhLEVBQUUsUUFBUSxDQUFDLENBQUM7SUFDeEUsTUFBTSxVQUFVLEdBQ1osZUFBZSxDQUFDLFNBQVMsRUFBRSxXQUFXLEVBQUUsYUFBYSxFQUFFLFFBQVEsQ0FBQyxDQUFDO0lBRXJFLElBQUksTUFBTSxDQUFDLElBQUksS0FBSyxDQUFDLEVBQUU7UUFDckIsTUFBTSxJQUFJLEtBQUssQ0FDWCwrQ0FBK0MsTUFBTSxDQUFDLEtBQUssRUFBRSxDQUFDLENBQUM7S0FDcEU7SUFDRCxJQUFJLFVBQVUsQ0FBQyxJQUFJLEtBQUssQ0FBQyxFQUFFO1FBQ3pCLE1BQU0sSUFBSSxLQUFLLENBQ1gsbURBQW1ELFVBQVUsQ0FBQyxLQUFLLEVBQUUsQ0FBQyxDQUFDO0tBQzVFO0lBRUQsTUFBTSxLQUFLLEdBQXFCLEVBQUMsU0FBUyxFQUFDLENBQUM7SUFDNUMsTUFBTSxNQUFNLEdBQXNCLEVBQUMsS0FBSyxFQUFFLE1BQU0sRUFBRSxTQUFTLEVBQUUsVUFBVSxFQUFDLENBQUM7SUFDekUsTUFBTSxNQUFNLEdBQ1IsTUFBTSxDQUFDLFNBQVMsQ0FBQyxXQUFXLEVBQUUsTUFBWSxFQUFFLEtBQVcsQ0FBQyxDQUFDO0lBQzdELE9BQU8sRUFBQyxPQUFPLEVBQUUsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLE1BQU0sRUFBRSxNQUFNLENBQUMsQ0FBQyxDQUFDLEVBQUUsS0FBSyxFQUFFLE1BQU0sQ0FBQyxDQUFDLENBQUMsRUFBQyxDQUFDO0FBQ25FLENBQUM7QUFFRCxNQUFNLENBQUMsTUFBTSxXQUFXLEdBQUcsZUFBZSxDQUFDLEVBQUUsQ0FBQyxFQUFDLFlBQVksRUFBQyxDQUFDLENBQUMiLCJzb3VyY2VzQ29udGVudCI6WyIvKipcbiAqIEBsaWNlbnNlXG4gKiBDb3B5cmlnaHQgMjAyMSBHb29nbGUgTExDLiBBbGwgUmlnaHRzIFJlc2VydmVkLlxuICogTGljZW5zZWQgdW5kZXIgdGhlIEFwYWNoZSBMaWNlbnNlLCBWZXJzaW9uIDIuMCAodGhlIFwiTGljZW5zZVwiKTtcbiAqIHlvdSBtYXkgbm90IHVzZSB0aGlzIGZpbGUgZXhjZXB0IGluIGNvbXBsaWFuY2Ugd2l0aCB0aGUgTGljZW5zZS5cbiAqIFlvdSBtYXkgb2J0YWluIGEgY29weSBvZiB0aGUgTGljZW5zZSBhdFxuICpcbiAqIGh0dHA6Ly93d3cuYXBhY2hlLm9yZy9saWNlbnNlcy9MSUNFTlNFLTIuMFxuICpcbiAqIFVubGVzcyByZXF1aXJlZCBieSBhcHBsaWNhYmxlIGxhdyBvciBhZ3JlZWQgdG8gaW4gd3JpdGluZywgc29mdHdhcmVcbiAqIGRpc3RyaWJ1dGVkIHVuZGVyIHRoZSBMaWNlbnNlIGlzIGRpc3RyaWJ1dGVkIG9uIGFuIFwiQVMgSVNcIiBCQVNJUyxcbiAqIFdJVEhPVVQgV0FSUkFOVElFUyBPUiBDT05ESVRJT05TIE9GIEFOWSBLSU5ELCBlaXRoZXIgZXhwcmVzcyBvciBpbXBsaWVkLlxuICogU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZFxuICogbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuXG4gKiA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PVxuICovXG5cbmltcG9ydCB7RU5HSU5FfSBmcm9tICcuLi8uLi9lbmdpbmUnO1xuaW1wb3J0IHtTdHJpbmdTcGxpdCwgU3RyaW5nU3BsaXRBdHRycywgU3RyaW5nU3BsaXRJbnB1dHN9IGZyb20gJy4uLy4uL2tlcm5lbF9uYW1lcyc7XG5pbXBvcnQge1NjYWxhciwgVGVuc29yLCBUZW5zb3IxRH0gZnJvbSAnLi4vLi4vdGVuc29yJztcbmltcG9ydCB7TmFtZWRUZW5zb3JNYXB9IGZyb20gJy4uLy4uL3RlbnNvcl90eXBlcyc7XG5pbXBvcnQge2NvbnZlcnRUb1RlbnNvcn0gZnJvbSAnLi4vLi4vdGVuc29yX3V0aWxfZW52JztcbmltcG9ydCB7U2NhbGFyTGlrZSwgVGVuc29yTGlrZX0gZnJvbSAnLi4vLi4vdHlwZXMnO1xuaW1wb3J0IHtvcH0gZnJvbSAnLi4vb3BlcmF0aW9uJztcblxuLyoqXG4gKiBTcGxpdCBlbGVtZW50cyBvZiBgaW5wdXRgIGJhc2VkIG9uIGBkZWxpbWl0ZXJgIGludG8gYSBTcGFyc2VUZW5zb3IgLlxuICpcbiAqIExldCBOIGJlIHRoZSBzaXplIG9mIHNvdXJjZSAodHlwaWNhbGx5IE4gd2lsbCBiZSB0aGUgYmF0Y2ggc2l6ZSkuIFNwbGl0IGVhY2hcbiAqIGVsZW1lbnQgb2YgYGlucHV0YCBiYXNlZCBvbiBgZGVsaW1pdGVyYCBhbmQgcmV0dXJuIGEgU3BhcnNlVGVuc29yIGNvbnRhaW5pbmdcbiAqIHRoZSBzcGxpdHRlZCB0b2tlbnMuIEVtcHR5IHRva2VucyBhcmUgaWdub3JlZCBpZiBgc2tpcEVtcHR5YCBpcyBzZXQgdG8gVHJ1ZS5cbiAqXG4gKiBgZGVsaW1pdGVyYCBjYW4gYmUgZW1wdHksIG9yIGEgc3RyaW5nIG9mIHNwbGl0IGNoYXJhY3RlcnMuIElmIGBkZWxpbWl0ZXJgIGlzXG4gKiBhbiBlbXB0eSBzdHJpbmcsIGVhY2ggZWxlbWVudCBvZiBgaW5wdXRgIGlzIHNwbGl0IGludG8gaW5kaXZpZHVhbFxuICogY2hhcmFjdGVyIHN0cmluZ3MuIE90aGVyd2lzZSBldmVyeSBjaGFyYWN0ZXIgb2YgYGRlbGltaXRlcmAgaXMgYSBwb3RlbnRpYWxcbiAqIHNwbGl0IHBvaW50LlxuICpcbiAqIGBgYGpzXG4gKiBjb25zdCByZXN1bHQgPSB0Zi5zdHJpbmcuc3RyaW5nU3BsaXQoWydoZWxsbyB3b3JsZCcsICAnYSBiIGMnXSwgJyAnKTtcbiAqIHJlc3VsdFsnaW5kaWNlcyddLnByaW50KCk7IC8vIFtbMCwgMF0sIFswLCAxXSwgWzEsIDBdLCBbMSwgMV0sIFsxLCAyXV1cbiAqIHJlc3VsdFsndmFsdWVzJ10ucHJpbnQoKTsgLy8gWydoZWxsbycsICd3b3JsZCcsICdhJywgJ2InLCAnYyddXG4gKiByZXN1bHRbJ3NoYXBlJ10ucHJpbnQoKTsgLy8gWzIsIDNdXG4gKiBgYGBcbiAqIEBwYXJhbSBpbnB1dDogMS1ELiBTdHJpbmdzIHRvIHNwbGl0LlxuICogQHBhcmFtIGRlbGltaXRlcjogMC1ELiBEZWxpbWl0ZXIgY2hhcmFjdGVycywgb3IgZW1wdHkgc3RyaW5nLlxuICogQHBhcmFtIHNraXBFbXB0eTogT3B0aW9uYWwuIElmIHRydWUsIHNraXAgdGhlIGVtcHR5IHN0cmluZ3MgZnJvbSB0aGUgcmVzdWx0LlxuICogICAgIERlZmF1bHRzIHRvIHRydWUuXG4gKiBAcmV0dXJuIEEgbWFwIHdpdGggdGhlIGZvbGxvd2luZyBwcm9wZXJ0aWVzOlxuICogICAgIC0gaW5kaWNlczogQSBkZW5zZSBtYXRyaXggb2YgaW50MzIgcmVwcmVzZW50aW5nIHRoZSBpbmRpY2VzIG9mIHRoZSBzcGFyc2VcbiAqICAgICAgIHRlbnNvci5cbiAqICAgICAtIHZhbHVlczogQSB2ZWN0b3Igb2Ygc3RyaW5ncyBjb3JyZXNwb25kaW5nIHRvIHRoZSBzcGxpdGVkIHZhbHVlcy5cbiAqICAgICAtIHNoYXBlOiBhIGxlbmd0aC0yIHZlY3RvciBvZiBpbnQzMiByZXByZXNlbnRpbmcgdGhlIHNoYXBlIG9mIHRoZSBzcGFyc2VcbiAqIHRlbnNvciwgd2hlcmUgdGhlIGZpcnN0IHZhbHVlIGlzIE4gYW5kIHRoZSBzZWNvbmQgdmFsdWUgaXMgdGhlIG1heGltdW0gbnVtYmVyXG4gKiBvZiB0b2tlbnMgaW4gYSBzaW5nbGUgaW5wdXQgZW50cnkuXG4gKlxuICogQGRvYyB7aGVhZGluZzogJ09wZXJhdGlvbnMnLCBzdWJoZWFkaW5nOiAnU3RyaW5nJ31cbiAqL1xuZnVuY3Rpb24gc3RyaW5nU3BsaXRfKFxuICAgIGlucHV0OiBUZW5zb3IxRHxUZW5zb3JMaWtlLCBkZWxpbWl0ZXI6IFNjYWxhcnxTY2FsYXJMaWtlLFxuICAgIHNraXBFbXB0eSA9IHRydWUpOiBOYW1lZFRlbnNvck1hcCB7XG4gIGNvbnN0ICRpbnB1dCA9IGNvbnZlcnRUb1RlbnNvcihpbnB1dCwgJ2lucHV0JywgJ3N0cmluZ1NwbGl0JywgJ3N0cmluZycpO1xuICBjb25zdCAkZGVsaW1pdGVyID1cbiAgICAgIGNvbnZlcnRUb1RlbnNvcihkZWxpbWl0ZXIsICdkZWxpbWl0ZXInLCAnc3RyaW5nU3BsaXQnLCAnc3RyaW5nJyk7XG5cbiAgaWYgKCRpbnB1dC5yYW5rICE9PSAxKSB7XG4gICAgdGhyb3cgbmV3IEVycm9yKFxuICAgICAgICBgSW5wdXQgc2hvdWxkIGJlIFRlbnNvcjFEIGJ1dCByZWNlaXZlZCBzaGFwZSAkeyRpbnB1dC5zaGFwZX1gKTtcbiAgfVxuICBpZiAoJGRlbGltaXRlci5yYW5rICE9PSAwKSB7XG4gICAgdGhyb3cgbmV3IEVycm9yKFxuICAgICAgICBgRGVsaW1pdGVyIHNob3VsZCBiZSBhIHNjYWxhciBidXQgcmVjZWl2ZWQgc2hhcGUgJHskZGVsaW1pdGVyLnNoYXBlfWApO1xuICB9XG5cbiAgY29uc3QgYXR0cnM6IFN0cmluZ1NwbGl0QXR0cnMgPSB7c2tpcEVtcHR5fTtcbiAgY29uc3QgaW5wdXRzOiBTdHJpbmdTcGxpdElucHV0cyA9IHtpbnB1dDogJGlucHV0LCBkZWxpbWl0ZXI6ICRkZWxpbWl0ZXJ9O1xuICBjb25zdCByZXN1bHQ6IFRlbnNvcltdID1cbiAgICAgIEVOR0lORS5ydW5LZXJuZWwoU3RyaW5nU3BsaXQsIGlucHV0cyBhcyB7fSwgYXR0cnMgYXMge30pO1xuICByZXR1cm4ge2luZGljZXM6IHJlc3VsdFswXSwgdmFsdWVzOiByZXN1bHRbMV0sIHNoYXBlOiByZXN1bHRbMl19O1xufVxuXG5leHBvcnQgY29uc3Qgc3RyaW5nU3BsaXQgPSAvKiBAX19QVVJFX18gKi8gb3Aoe3N0cmluZ1NwbGl0X30pO1xuIl19