/** * @license * Copyright 2021 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ import { ENGINE } from '../../engine'; import { StringSplit } from '../../kernel_names'; import { convertToTensor } from '../../tensor_util_env'; import { op } from '../operation'; /** * Split elements of `input` based on `delimiter` into a SparseTensor . * * Let N be the size of source (typically N will be the batch size). Split each * element of `input` based on `delimiter` and return a SparseTensor containing * the splitted tokens. Empty tokens are ignored if `skipEmpty` is set to True. * * `delimiter` can be empty, or a string of split characters. If `delimiter` is * an empty string, each element of `input` is split into individual * character strings. Otherwise every character of `delimiter` is a potential * split point. * * ```js * const result = tf.string.stringSplit(['hello world', 'a b c'], ' '); * result['indices'].print(); // [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]] * result['values'].print(); // ['hello', 'world', 'a', 'b', 'c'] * result['shape'].print(); // [2, 3] * ``` * @param input: 1-D. Strings to split. * @param delimiter: 0-D. Delimiter characters, or empty string. * @param skipEmpty: Optional. If true, skip the empty strings from the result. * Defaults to true. * @return A map with the following properties: * - indices: A dense matrix of int32 representing the indices of the sparse * tensor. * - values: A vector of strings corresponding to the splited values. * - shape: a length-2 vector of int32 representing the shape of the sparse * tensor, where the first value is N and the second value is the maximum number * of tokens in a single input entry. * * @doc {heading: 'Operations', subheading: 'String'} */ function stringSplit_(input, delimiter, skipEmpty = true) { const $input = convertToTensor(input, 'input', 'stringSplit', 'string'); const $delimiter = convertToTensor(delimiter, 'delimiter', 'stringSplit', 'string'); if ($input.rank !== 1) { throw new Error(`Input should be Tensor1D but received shape ${$input.shape}`); } if ($delimiter.rank !== 0) { throw new Error(`Delimiter should be a scalar but received shape ${$delimiter.shape}`); } const attrs = { skipEmpty }; const inputs = { input: $input, delimiter: $delimiter }; const result = ENGINE.runKernel(StringSplit, inputs, attrs); return { indices: result[0], values: result[1], shape: result[2] }; } export const stringSplit = /* @__PURE__ */ op({ stringSplit_ }); //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic3RyaW5nX3NwbGl0LmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vLi4vLi4vLi4vLi4vLi4vdGZqcy1jb3JlL3NyYy9vcHMvc3RyaW5nL3N0cmluZ19zcGxpdC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQTs7Ozs7Ozs7Ozs7Ozs7O0dBZUc7QUFFSCxPQUFPLEVBQUMsTUFBTSxFQUFDLE1BQU0sY0FBYyxDQUFDO0FBQ3BDLE9BQU8sRUFBQyxXQUFXLEVBQXNDLE1BQU0sb0JBQW9CLENBQUM7QUFHcEYsT0FBTyxFQUFDLGVBQWUsRUFBQyxNQUFNLHVCQUF1QixDQUFDO0FBRXRELE9BQU8sRUFBQyxFQUFFLEVBQUMsTUFBTSxjQUFjLENBQUM7QUFFaEM7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7R0ErQkc7QUFDSCxTQUFTLFlBQVksQ0FDakIsS0FBMEIsRUFBRSxTQUE0QixFQUN4RCxTQUFTLEdBQUcsSUFBSTtJQUNsQixNQUFNLE1BQU0sR0FBRyxlQUFlLENBQUMsS0FBSyxFQUFFLE9BQU8sRUFBRSxhQUFhLEVBQUUsUUFBUSxDQUFDLENBQUM7SUFDeEUsTUFBTSxVQUFVLEdBQ1osZUFBZSxDQUFDLFNBQVMsRUFBRSxXQUFXLEVBQUUsYUFBYSxFQUFFLFFBQVEsQ0FBQyxDQUFDO0lBRXJFLElBQUksTUFBTSxDQUFDLElBQUksS0FBSyxDQUFDLEVBQUU7UUFDckIsTUFBTSxJQUFJLEtBQUssQ0FDWCwrQ0FBK0MsTUFBTSxDQUFDLEtBQUssRUFBRSxDQUFDLENBQUM7S0FDcEU7SUFDRCxJQUFJLFVBQVUsQ0FBQyxJQUFJLEtBQUssQ0FBQyxFQUFFO1FBQ3pCLE1BQU0sSUFBSSxLQUFLLENBQ1gsbURBQW1ELFVBQVUsQ0FBQyxLQUFLLEVBQUUsQ0FBQyxDQUFDO0tBQzVFO0lBRUQsTUFBTSxLQUFLLEdBQXFCLEVBQUMsU0FBUyxFQUFDLENBQUM7SUFDNUMsTUFBTSxNQUFNLEdBQXNCLEVBQUMsS0FBSyxFQUFFLE1BQU0sRUFBRSxTQUFTLEVBQUUsVUFBVSxFQUFDLENBQUM7SUFDekUsTUFBTSxNQUFNLEdBQ1IsTUFBTSxDQUFDLFNBQVMsQ0FBQyxXQUFXLEVBQUUsTUFBWSxFQUFFLEtBQVcsQ0FBQyxDQUFDO0lBQzdELE9BQU8sRUFBQyxPQUFPLEVBQUUsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLE1BQU0sRUFBRSxNQUFNLENBQUMsQ0FBQyxDQUFDLEVBQUUsS0FBSyxFQUFFLE1BQU0sQ0FBQyxDQUFDLENBQUMsRUFBQyxDQUFDO0FBQ25FLENBQUM7QUFFRCxNQUFNLENBQUMsTUFBTSxXQUFXLEdBQUcsZUFBZSxDQUFDLEVBQUUsQ0FBQyxFQUFDLFlBQVksRUFBQyxDQUFDLENBQUMiLCJzb3VyY2VzQ29udGVudCI6WyIvKipcbiAqIEBsaWNlbnNlXG4gKiBDb3B5cmlnaHQgMjAyMSBHb29nbGUgTExDLiBBbGwgUmlnaHRzIFJlc2VydmVkLlxuICogTGljZW5zZWQgdW5kZXIgdGhlIEFwYWNoZSBMaWNlbnNlLCBWZXJzaW9uIDIuMCAodGhlIFwiTGljZW5zZVwiKTtcbiAqIHlvdSBtYXkgbm90IHVzZSB0aGlzIGZpbGUgZXhjZXB0IGluIGNvbXBsaWFuY2Ugd2l0aCB0aGUgTGljZW5zZS5cbiAqIFlvdSBtYXkgb2J0YWluIGEgY29weSBvZiB0aGUgTGljZW5zZSBhdFxuICpcbiAqIGh0dHA6Ly93d3cuYXBhY2hlLm9yZy9saWNlbnNlcy9MSUNFTlNFLTIuMFxuICpcbiAqIFVubGVzcyByZXF1aXJlZCBieSBhcHBsaWNhYmxlIGxhdyBvciBhZ3JlZWQgdG8gaW4gd3JpdGluZywgc29mdHdhcmVcbiAqIGRpc3RyaWJ1dGVkIHVuZGVyIHRoZSBMaWNlbnNlIGlzIGRpc3RyaWJ1dGVkIG9uIGFuIFwiQVMgSVNcIiBCQVNJUyxcbiAqIFdJVEhPVVQgV0FSUkFOVElFUyBPUiBDT05ESVRJT05TIE9GIEFOWSBLSU5ELCBlaXRoZXIgZXhwcmVzcyBvciBpbXBsaWVkLlxuICogU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZFxuICogbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuXG4gKiA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PVxuICovXG5cbmltcG9ydCB7RU5HSU5FfSBmcm9tICcuLi8uLi9lbmdpbmUnO1xuaW1wb3J0IHtTdHJpbmdTcGxpdCwgU3RyaW5nU3BsaXRBdHRycywgU3RyaW5nU3BsaXRJbnB1dHN9IGZyb20gJy4uLy4uL2tlcm5lbF9uYW1lcyc7XG5pbXBvcnQge1NjYWxhciwgVGVuc29yLCBUZW5zb3IxRH0gZnJvbSAnLi4vLi4vdGVuc29yJztcbmltcG9ydCB7TmFtZWRUZW5zb3JNYXB9IGZyb20gJy4uLy4uL3RlbnNvcl90eXBlcyc7XG5pbXBvcnQge2NvbnZlcnRUb1RlbnNvcn0gZnJvbSAnLi4vLi4vdGVuc29yX3V0aWxfZW52JztcbmltcG9ydCB7U2NhbGFyTGlrZSwgVGVuc29yTGlrZX0gZnJvbSAnLi4vLi4vdHlwZXMnO1xuaW1wb3J0IHtvcH0gZnJvbSAnLi4vb3BlcmF0aW9uJztcblxuLyoqXG4gKiBTcGxpdCBlbGVtZW50cyBvZiBgaW5wdXRgIGJhc2VkIG9uIGBkZWxpbWl0ZXJgIGludG8gYSBTcGFyc2VUZW5zb3IgLlxuICpcbiAqIExldCBOIGJlIHRoZSBzaXplIG9mIHNvdXJjZSAodHlwaWNhbGx5IE4gd2lsbCBiZSB0aGUgYmF0Y2ggc2l6ZSkuIFNwbGl0IGVhY2hcbiAqIGVsZW1lbnQgb2YgYGlucHV0YCBiYXNlZCBvbiBgZGVsaW1pdGVyYCBhbmQgcmV0dXJuIGEgU3BhcnNlVGVuc29yIGNvbnRhaW5pbmdcbiAqIHRoZSBzcGxpdHRlZCB0b2tlbnMuIEVtcHR5IHRva2VucyBhcmUgaWdub3JlZCBpZiBgc2tpcEVtcHR5YCBpcyBzZXQgdG8gVHJ1ZS5cbiAqXG4gKiBgZGVsaW1pdGVyYCBjYW4gYmUgZW1wdHksIG9yIGEgc3RyaW5nIG9mIHNwbGl0IGNoYXJhY3RlcnMuIElmIGBkZWxpbWl0ZXJgIGlzXG4gKiBhbiBlbXB0eSBzdHJpbmcsIGVhY2ggZWxlbWVudCBvZiBgaW5wdXRgIGlzIHNwbGl0IGludG8gaW5kaXZpZHVhbFxuICogY2hhcmFjdGVyIHN0cmluZ3MuIE90aGVyd2lzZSBldmVyeSBjaGFyYWN0ZXIgb2YgYGRlbGltaXRlcmAgaXMgYSBwb3RlbnRpYWxcbiAqIHNwbGl0IHBvaW50LlxuICpcbiAqIGBgYGpzXG4gKiBjb25zdCByZXN1bHQgPSB0Zi5zdHJpbmcuc3RyaW5nU3BsaXQoWydoZWxsbyB3b3JsZCcsICAnYSBiIGMnXSwgJyAnKTtcbiAqIHJlc3VsdFsnaW5kaWNlcyddLnByaW50KCk7IC8vIFtbMCwgMF0sIFswLCAxXSwgWzEsIDBdLCBbMSwgMV0sIFsxLCAyXV1cbiAqIHJlc3VsdFsndmFsdWVzJ10ucHJpbnQoKTsgLy8gWydoZWxsbycsICd3b3JsZCcsICdhJywgJ2InLCAnYyddXG4gKiByZXN1bHRbJ3NoYXBlJ10ucHJpbnQoKTsgLy8gWzIsIDNdXG4gKiBgYGBcbiAqIEBwYXJhbSBpbnB1dDogMS1ELiBTdHJpbmdzIHRvIHNwbGl0LlxuICogQHBhcmFtIGRlbGltaXRlcjogMC1ELiBEZWxpbWl0ZXIgY2hhcmFjdGVycywgb3IgZW1wdHkgc3RyaW5nLlxuICogQHBhcmFtIHNraXBFbXB0eTogT3B0aW9uYWwuIElmIHRydWUsIHNraXAgdGhlIGVtcHR5IHN0cmluZ3MgZnJvbSB0aGUgcmVzdWx0LlxuICogICAgIERlZmF1bHRzIHRvIHRydWUuXG4gKiBAcmV0dXJuIEEgbWFwIHdpdGggdGhlIGZvbGxvd2luZyBwcm9wZXJ0aWVzOlxuICogICAgIC0gaW5kaWNlczogQSBkZW5zZSBtYXRyaXggb2YgaW50MzIgcmVwcmVzZW50aW5nIHRoZSBpbmRpY2VzIG9mIHRoZSBzcGFyc2VcbiAqICAgICAgIHRlbnNvci5cbiAqICAgICAtIHZhbHVlczogQSB2ZWN0b3Igb2Ygc3RyaW5ncyBjb3JyZXNwb25kaW5nIHRvIHRoZSBzcGxpdGVkIHZhbHVlcy5cbiAqICAgICAtIHNoYXBlOiBhIGxlbmd0aC0yIHZlY3RvciBvZiBpbnQzMiByZXByZXNlbnRpbmcgdGhlIHNoYXBlIG9mIHRoZSBzcGFyc2VcbiAqIHRlbnNvciwgd2hlcmUgdGhlIGZpcnN0IHZhbHVlIGlzIE4gYW5kIHRoZSBzZWNvbmQgdmFsdWUgaXMgdGhlIG1heGltdW0gbnVtYmVyXG4gKiBvZiB0b2tlbnMgaW4gYSBzaW5nbGUgaW5wdXQgZW50cnkuXG4gKlxuICogQGRvYyB7aGVhZGluZzogJ09wZXJhdGlvbnMnLCBzdWJoZWFkaW5nOiAnU3RyaW5nJ31cbiAqL1xuZnVuY3Rpb24gc3RyaW5nU3BsaXRfKFxuICAgIGlucHV0OiBUZW5zb3IxRHxUZW5zb3JMaWtlLCBkZWxpbWl0ZXI6IFNjYWxhcnxTY2FsYXJMaWtlLFxuICAgIHNraXBFbXB0eSA9IHRydWUpOiBOYW1lZFRlbnNvck1hcCB7XG4gIGNvbnN0ICRpbnB1dCA9IGNvbnZlcnRUb1RlbnNvcihpbnB1dCwgJ2lucHV0JywgJ3N0cmluZ1NwbGl0JywgJ3N0cmluZycpO1xuICBjb25zdCAkZGVsaW1pdGVyID1cbiAgICAgIGNvbnZlcnRUb1RlbnNvcihkZWxpbWl0ZXIsICdkZWxpbWl0ZXInLCAnc3RyaW5nU3BsaXQnLCAnc3RyaW5nJyk7XG5cbiAgaWYgKCRpbnB1dC5yYW5rICE9PSAxKSB7XG4gICAgdGhyb3cgbmV3IEVycm9yKFxuICAgICAgICBgSW5wdXQgc2hvdWxkIGJlIFRlbnNvcjFEIGJ1dCByZWNlaXZlZCBzaGFwZSAkeyRpbnB1dC5zaGFwZX1gKTtcbiAgfVxuICBpZiAoJGRlbGltaXRlci5yYW5rICE9PSAwKSB7XG4gICAgdGhyb3cgbmV3IEVycm9yKFxuICAgICAgICBgRGVsaW1pdGVyIHNob3VsZCBiZSBhIHNjYWxhciBidXQgcmVjZWl2ZWQgc2hhcGUgJHskZGVsaW1pdGVyLnNoYXBlfWApO1xuICB9XG5cbiAgY29uc3QgYXR0cnM6IFN0cmluZ1NwbGl0QXR0cnMgPSB7c2tpcEVtcHR5fTtcbiAgY29uc3QgaW5wdXRzOiBTdHJpbmdTcGxpdElucHV0cyA9IHtpbnB1dDogJGlucHV0LCBkZWxpbWl0ZXI6ICRkZWxpbWl0ZXJ9O1xuICBjb25zdCByZXN1bHQ6IFRlbnNvcltdID1cbiAgICAgIEVOR0lORS5ydW5LZXJuZWwoU3RyaW5nU3BsaXQsIGlucHV0cyBhcyB7fSwgYXR0cnMgYXMge30pO1xuICByZXR1cm4ge2luZGljZXM6IHJlc3VsdFswXSwgdmFsdWVzOiByZXN1bHRbMV0sIHNoYXBlOiByZXN1bHRbMl19O1xufVxuXG5leHBvcnQgY29uc3Qgc3RyaW5nU3BsaXQgPSAvKiBAX19QVVJFX18gKi8gb3Aoe3N0cmluZ1NwbGl0X30pO1xuIl19