/**
|
* @license
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*
|
* =============================================================================
|
*/
|
import * as tf from '@tensorflow/tfjs-core';
|
import * as seedrandom from 'seedrandom';
|
import { iteratorFromConcatenated, iteratorFromFunction, iteratorFromItems, iteratorFromZipped, ZipMismatchMode } from './iterators/lazy_iterator';
|
import { canTensorify, deepMapAndAwaitAll, isIterable } from './util/deep_map';
|
// TODO(soergel): consider vectorized operations within the pipeline.
|
/**
|
* Represents a potentially large list of independent data elements (typically
|
* 'samples' or 'examples').
|
*
|
* A 'data example' may be a primitive, an array, a map from string keys to
|
* values, or any nested structure of these.
|
*
|
* A `Dataset` represents an ordered collection of elements, together with a
|
* chain of transformations to be performed on those elements. Each
|
* transformation is a method of `Dataset` that returns another `Dataset`, so
|
* these may be chained, e.g.
|
* `const processedDataset = rawDataset.filter(...).map(...).batch(...)`.
|
*
|
* Data loading and transformation is done in a lazy, streaming fashion. The
|
* dataset may be iterated over multiple times; each iteration starts the data
|
* loading anew and recapitulates the transformations.
|
*
|
* A `Dataset` is typically processed as a stream of unbatched examples -- i.e.,
|
* its transformations are applied one example at a time. Batching produces a
|
* new `Dataset` where each element is a batch. Batching should usually come
|
* last in a pipeline, because data transformations are easier to express on a
|
* per-example basis than on a per-batch basis.
|
*
|
* The following code examples are calling `await dataset.forEachAsync(...)` to
|
* iterate once over the entire dataset in order to print out the data.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'}
|
*/
|
class Dataset {
|
constructor() {
|
this.size = null;
|
}
|
// TODO(soergel): Make Datasets report whether repeated iterator() calls
|
// produce the same result (e.g., reading from a file) or different results
|
// (e.g., from the webcam). Currently we don't make this distinction but it
|
// could be important for the user to know.
|
// abstract isDeterministic(): boolean;
|
/**
|
* Groups elements into batches.
|
*
|
* It is assumed that each of the incoming dataset elements has the same
|
* structure -- i.e. the same set of keys at each location in an object
|
* hierarchy. For each key, the resulting `Dataset` provides a batched
|
* element collecting all of the incoming values for that key.
|
*
|
* * Incoming primitives are grouped into a 1-D Tensor.
|
* * Incoming Tensors are grouped into a new Tensor where the 0th axis is
|
* the batch dimension.
|
* * Incoming arrays are converted to Tensor and then batched.
|
* * A nested array is interpreted as an n-D Tensor, so the batched result
|
* has n+1 dimensions.
|
* * An array that cannot be converted to Tensor produces an error.
|
*
|
* If an array should not be batched as a unit, it should first be converted
|
* to an object with integer keys.
|
*
|
* Here are a few examples:
|
*
|
* Batch a dataset of numbers:
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8]).batch(4);
|
* await a.forEachAsync(e => e.print());
|
* ```
|
*
|
* Batch a dataset of arrays:
|
* ```js
|
* const b = tf.data.array([[1], [2], [3], [4], [5], [6], [7], [8]]).batch(4);
|
* await b.forEachAsync(e => e.print());
|
* ```
|
*
|
* Batch a dataset of objects:
|
* ```js
|
* const c = tf.data.array([{a: 1, b: 11}, {a: 2, b: 12}, {a: 3, b: 13},
|
* {a: 4, b: 14}, {a: 5, b: 15}, {a: 6, b: 16}, {a: 7, b: 17},
|
* {a: 8, b: 18}]).batch(4);
|
* await c.forEachAsync(e => {
|
* console.log('{');
|
* for(var key in e) {
|
* console.log(key+':');
|
* e[key].print();
|
* }
|
* console.log('}');
|
* })
|
* ```
|
*
|
* @param batchSize The number of elements desired per batch.
|
* @param smallLastBatch Whether to emit the final batch when it has fewer
|
* than batchSize elements. Default true.
|
* @returns A `Dataset`, from which a stream of batches can be obtained.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
batch(batchSize, smallLastBatch = true) {
|
const base = this;
|
tf.util.assert(batchSize > 0, () => `batchSize needs to be positive, but it is
|
${batchSize}`);
|
let size;
|
if (this.size === Infinity || this.size == null) {
|
// If the size of this dataset is infinity or null, the new size keeps the
|
// same.
|
size = this.size;
|
}
|
else if (smallLastBatch) {
|
// If the size of this dataset is known and include small last batch, the
|
// new size is full batch count plus last batch.
|
size = Math.ceil(this.size / batchSize);
|
}
|
else {
|
// If the size of this dataset is known and not include small last batch,
|
// the new size is full batch count.
|
size = Math.floor(this.size / batchSize);
|
}
|
return datasetFromIteratorFn(async () => {
|
return (await base.iterator())
|
.columnMajorBatch(batchSize, smallLastBatch, deepBatchConcat);
|
}, size);
|
}
|
/**
|
* Concatenates this `Dataset` with another.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3]);
|
* const b = tf.data.array([4, 5, 6]);
|
* const c = a.concatenate(b);
|
* await c.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param dataset A `Dataset` to be concatenated onto this one.
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
concatenate(dataset) {
|
const base = this;
|
let size;
|
if (this.size === Infinity || dataset.size === Infinity) {
|
// If the size of any of these two dataset is infinity, new size is
|
// infinity.
|
size = Infinity;
|
}
|
else if (this.size != null && dataset.size != null) {
|
// If the size of both datasets are known and not infinity, new size is
|
// sum the size of these two datasets.
|
size = this.size + dataset.size;
|
}
|
else {
|
// If neither of these two datasets has infinite size and any of these two
|
// datasets' size is null, the new size is null.
|
size = null;
|
}
|
return datasetFromIteratorFn(async () => (await base.iterator()).concatenate(await dataset.iterator()), size);
|
}
|
/**
|
* Filters this dataset according to `predicate`.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
|
* .filter(x => x%2 === 0);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param predicate A function mapping a dataset element to a boolean or a
|
* `Promise` for one.
|
*
|
* @returns A `Dataset` of elements for which the predicate was true.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
filter(predicate) {
|
const base = this;
|
let size;
|
if (this.size === Infinity) {
|
// If the size of this dataset is infinity, new size is infinity
|
size = Infinity;
|
}
|
else {
|
// If this dataset has limited elements, new size is null because it might
|
// exhausted randomly.
|
size = null;
|
}
|
return datasetFromIteratorFn(async () => {
|
return (await base.iterator()).filter(x => tf.tidy(() => predicate(x)));
|
}, size);
|
}
|
/**
|
* Apply a function to every element of the dataset.
|
*
|
* After the function is applied to a dataset element, any Tensors contained
|
* within that element are disposed.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3]);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param f A function to apply to each dataset element.
|
* @returns A `Promise` that resolves after all elements have been processed.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
async forEachAsync(f) {
|
return (await this.iterator()).forEachAsync(f);
|
}
|
/**
|
* Maps this dataset through a 1-to-1 transform.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3]).map(x => x*x);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param transform A function mapping a dataset element to a transformed
|
* dataset element.
|
*
|
* @returns A `Dataset` of transformed elements.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
map(transform) {
|
const base = this;
|
return datasetFromIteratorFn(async () => {
|
return (await base.iterator()).map(x => tf.tidy(() => transform(x)));
|
}, this.size);
|
}
|
/**
|
* Maps this dataset through an async 1-to-1 transform.
|
*
|
* ```js
|
* const a =
|
* tf.data.array([1, 2, 3]).mapAsync(x => new Promise(function(resolve){
|
* setTimeout(() => {
|
* resolve(x * x);
|
* }, Math.random()*1000 + 500);
|
* }));
|
* console.log(await a.toArray());
|
* ```
|
*
|
* @param transform A function mapping a dataset element to a `Promise` for a
|
* transformed dataset element. This transform is responsible for disposing
|
* any intermediate `Tensor`s, i.e. by wrapping its computation in
|
* `tf.tidy()`; that cannot be automated here (as it is in the synchronous
|
* `map()` case).
|
*
|
* @returns A `Dataset` of transformed elements.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
mapAsync(transform) {
|
const base = this;
|
return datasetFromIteratorFn(async () => {
|
return (await base.iterator()).mapAsync(transform);
|
}, this.size);
|
}
|
/**
|
* Creates a `Dataset` that prefetches elements from this dataset.
|
*
|
* @param bufferSize: An integer specifying the number of elements to be
|
* prefetched.
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
prefetch(bufferSize) {
|
if (bufferSize == null) {
|
throw new RangeError('`Dataset.prefetch()` requires bufferSize to be specified.');
|
}
|
const base = this;
|
return datasetFromIteratorFn(async () => (await base.iterator()).prefetch(bufferSize), this.size);
|
}
|
/**
|
* Repeats this dataset `count` times.
|
*
|
* NOTE: If this dataset is a function of global state (e.g. a random number
|
* generator), then different repetitions may produce different elements.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3]).repeat(3);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param count: (Optional) An integer, representing the number of times
|
* the dataset should be repeated. The default behavior (if `count` is
|
* `undefined` or negative) is for the dataset be repeated indefinitely.
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
repeat(count) {
|
const base = this;
|
let size;
|
if (this.size != null && count > 0) {
|
// If this dataset has size and count is positive, new size is current
|
// size multiply count. This also covers the case that current size is
|
// infinity.
|
size = this.size * count;
|
}
|
else if (count === 0) {
|
// If count is 0, new size is 0.
|
size = 0;
|
}
|
else if (this.size != null && (count === undefined || count < 0)) {
|
// If this dataset has size and count is undefined or negative, the
|
// dataset will be repeated indefinitely and new size is infinity.
|
size = Infinity;
|
}
|
else {
|
// If the size of this dataset is null, the new dataset's size is null.
|
size = null;
|
}
|
return datasetFromIteratorFn(async () => {
|
const iteratorIterator = iteratorFromFunction(async () => ({ value: await base.iterator(), done: false }));
|
return iteratorFromConcatenated(iteratorIterator.take(count));
|
}, size);
|
}
|
/**
|
* Creates a `Dataset` that skips `count` initial elements from this dataset.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).skip(3);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param count: The number of elements of this dataset that should be skipped
|
* to form the new dataset. If `count` is greater than the size of this
|
* dataset, the new dataset will contain no elements. If `count`
|
* is `undefined` or negative, skips the entire dataset.
|
*
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
skip(count) {
|
const base = this;
|
let size;
|
if (this.size != null && count >= 0 && this.size >= count) {
|
// If the size of this dataset is greater than count, the new dataset's
|
// size is current size minus skipped size.This also covers the case that
|
// current size is infinity.
|
size = this.size - count;
|
}
|
else if (this.size != null &&
|
(this.size < count || count === undefined || count < 0)) {
|
// If the size of this dataset is smaller than count, or count is
|
// undefined or negative, skips the entire dataset and the new size is 0.
|
size = 0;
|
}
|
else {
|
// If the size of this dataset is null, the new dataset's size is null.
|
size = null;
|
}
|
return datasetFromIteratorFn(async () => (await base.iterator()).skip(count), size);
|
}
|
/**
|
* Pseudorandomly shuffles the elements of this dataset. This is done in a
|
* streaming manner, by sampling from a given number of prefetched elements.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).shuffle(3);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param bufferSize: An integer specifying the number of elements from this
|
* dataset from which the new dataset will sample.
|
* @param seed: (Optional) An integer specifying the random seed that will
|
* be used to create the distribution.
|
* @param reshuffleEachIteration: (Optional) A boolean, which if true
|
* indicates that the dataset should be pseudorandomly reshuffled each time
|
* it is iterated over. If false, elements will be returned in the same
|
* shuffled order on each iteration. (Defaults to `true`.)
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
shuffle(bufferSize, seed, reshuffleEachIteration = true) {
|
if (bufferSize == null || bufferSize < 0) {
|
if (this.size == null) {
|
throw new RangeError('`Dataset.shuffle()` requires bufferSize to be specified.');
|
}
|
else {
|
throw new RangeError('`Dataset.shuffle()` requires bufferSize to be specified. ' +
|
'If your data fits in main memory (for regular JS objects), ' +
|
'and/or GPU memory (for `tf.Tensor`s), consider setting ' +
|
`bufferSize to the dataset size (${this.size} elements)`);
|
}
|
}
|
const base = this;
|
const random = seedrandom.alea(seed || tf.util.now().toString());
|
return datasetFromIteratorFn(async () => {
|
let seed2 = random.int32();
|
if (reshuffleEachIteration) {
|
seed2 += random.int32();
|
}
|
return (await base.iterator()).shuffle(bufferSize, seed2.toString());
|
}, this.size);
|
}
|
/**
|
* Creates a `Dataset` with at most `count` initial elements from this
|
* dataset.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).take(3);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* @param count: The number of elements of this dataset that should be taken
|
* to form the new dataset. If `count` is `undefined` or negative, or if
|
* `count` is greater than the size of this dataset, the new dataset will
|
* contain all elements of this dataset.
|
* @returns A `Dataset`.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
take(count) {
|
const base = this;
|
let size;
|
if (this.size != null && this.size > count) {
|
// If the size of this dataset is greater than count, the new dataset's
|
// size is count.
|
size = count;
|
}
|
else if (this.size != null && this.size <= count) {
|
// If the size of this dataset is equal or smaller than count, the new
|
// dataset's size is the size of this dataset.
|
size = this.size;
|
}
|
else {
|
// If the size of this dataset is null, the new dataset's size is null.
|
size = null;
|
}
|
return datasetFromIteratorFn(async () => (await base.iterator()).take(count), size);
|
}
|
/**
|
* Collect all elements of this dataset into an array.
|
*
|
* Obviously this will succeed only for small datasets that fit in memory.
|
* Useful for testing and generally should be avoided if possible.
|
*
|
* ```js
|
* const a = tf.data.array([1, 2, 3, 4, 5, 6]);
|
* console.log(await a.toArray());
|
* ```
|
*
|
* @returns A Promise for an array of elements, which will resolve
|
* when a new stream has been obtained and fully consumed.
|
*
|
* @doc {heading: 'Data', subheading: 'Classes'}
|
*/
|
async toArray() {
|
if (this.size === Infinity) {
|
throw new Error('Can not convert infinite data stream to array.');
|
}
|
return (await this.iterator()).toArray();
|
}
|
/**
|
* Collect all elements of this dataset into an array with prefetching 100
|
* elements. This is useful for testing, because the prefetch changes the
|
* order in which the Promises are resolved along the processing pipeline.
|
* This may help expose bugs where results are dependent on the order of
|
* Promise resolution rather than on the logical order of the stream (i.e.,
|
* due to hidden mutable state).
|
*
|
* @returns A Promise for an array of elements, which will resolve
|
* when a new stream has been obtained and fully consumed.
|
*/
|
async toArrayForTest() {
|
if (this.size === Infinity) {
|
throw new Error('Can not convert infinite data stream to array.');
|
}
|
return (await this.iterator()).toArrayForTest();
|
}
|
}
|
// TODO(soergel): deep sharded shuffle, where supported
|
Dataset.MAX_BUFFER_SIZE = 10000;
|
export { Dataset };
|
/**
|
* Create a `Dataset` defined by a provided iterator() function.
|
*
|
* ```js
|
* let i = -1;
|
* const func = () =>
|
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
|
* const iter = tf.data.iteratorFromFunction(func);
|
* const ds = tf.data.datasetFromIteratorFn(iter);
|
* await ds.forEachAsync(e => console.log(e));
|
* ```
|
*/
|
export function datasetFromIteratorFn(iteratorFn, size = null) {
|
return new class extends Dataset {
|
constructor() {
|
super(...arguments);
|
this.size = size;
|
}
|
/*
|
* Provide a new stream of elements. Note this will also start new streams
|
* from any underlying `Dataset`s.
|
*/
|
async iterator() {
|
return iteratorFn();
|
}
|
}();
|
}
|
/**
|
* Create a `Dataset` from an array of elements.
|
*
|
* Create a Dataset from an array of objects:
|
* ```js
|
* const a = tf.data.array([{'item': 1}, {'item': 2}, {'item': 3}]);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* Create a Dataset from an array of numbers:
|
* ```js
|
* const a = tf.data.array([4, 5, 6]);
|
* await a.forEachAsync(e => console.log(e));
|
* ```
|
* @param items An array of elements that will be parsed as items in a dataset.
|
*
|
* @doc {heading: 'Data', subheading: 'Creation', namespace: 'data'}
|
*/
|
export function array(items) {
|
return datasetFromIteratorFn(async () => iteratorFromItems(items), items.length);
|
}
|
/**
|
* Create a `Dataset` by zipping together an array, dict, or nested
|
* structure of `Dataset`s (and perhaps additional constants).
|
* The underlying datasets must provide elements in a consistent order such that
|
* they correspond.
|
*
|
* The number of elements in the resulting dataset is the same as the size of
|
* the smallest dataset in datasets.
|
*
|
* The nested structure of the `datasets` argument determines the
|
* structure of elements in the resulting iterator.
|
*
|
* Note this means that, given an array of two datasets that produce dict
|
* elements, the result is a dataset that produces elements that are arrays
|
* of two dicts:
|
*
|
* Zip an array of datasets:
|
* ```js
|
* console.log('Zip two datasets of objects:');
|
* const ds1 = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
|
* const ds2 = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
|
* const ds3 = tf.data.zip([ds1, ds2]);
|
* await ds3.forEachAsync(e => console.log(JSON.stringify(e)));
|
*
|
* // If the goal is to merge the dicts in order to produce elements like
|
* // {a: ..., b: ...}, this requires a second step such as:
|
* console.log('Merge the objects:');
|
* const ds4 = ds3.map(x => {return {a: x[0].a, b: x[1].b}});
|
* await ds4.forEachAsync(e => console.log(e));
|
* ```
|
*
|
* Zip a dict of datasets:
|
* ```js
|
* const a = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
|
* const b = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
|
* const c = tf.data.zip({c: a, d: b});
|
* await c.forEachAsync(e => console.log(JSON.stringify(e)));
|
* ```
|
*
|
* @doc {heading: 'Data', subheading: 'Operations', namespace: 'data'}
|
*/
|
export function zip(datasets) {
|
// manually type-check the argument for JS users
|
if (!isIterable(datasets)) {
|
throw new Error('The argument to zip() must be an object or array.');
|
}
|
let size;
|
if (Array.isArray(datasets)) {
|
for (let i = 0; i < datasets.length; i++) {
|
size = size == null ? datasets[i].size :
|
Math.min(size, datasets[i].size);
|
}
|
}
|
else if (datasets instanceof Object) {
|
for (const ds in datasets) {
|
size = size == null ? datasets[ds].size :
|
Math.min(size, datasets[ds].size);
|
}
|
}
|
return datasetFromIteratorFn(async () => {
|
const streams = await deepMapAndAwaitAll(datasets, d => {
|
if (d instanceof Dataset) {
|
return { value: d.iterator(), recurse: false };
|
}
|
else if (isIterable(d)) {
|
return { value: null, recurse: true };
|
}
|
else {
|
throw new Error('Leaves of the structure passed to zip() must be Datasets, ' +
|
'not primitives.');
|
}
|
});
|
return iteratorFromZipped(streams, ZipMismatchMode.SHORTEST);
|
}, size);
|
}
|
/**
|
* A zip function for use with deepZip, passed via the columnMajorBatch call.
|
*
|
* Accepts an array of identically-structured nested elements and either batches
|
* them (if they are primitives, numeric arrays, or Tensors) or requests
|
* recursion (if not).
|
*/
|
// tslint:disable-next-line:no-any
|
function deepBatchConcat(rows) {
|
if (rows === null) {
|
return null;
|
}
|
// use the first item to decide whether to recurse or batch here.
|
const exampleRow = rows[0];
|
if (canTensorify(exampleRow)) {
|
// rows is an array of primitives, Tensors, or arrays. Batch them.
|
const value = batchConcat(rows);
|
return { value, recurse: false };
|
}
|
// the example row is an object, so recurse into it.
|
return { value: null, recurse: true };
|
}
|
/**
|
* Assembles a list of same-shaped numbers, number arrays, or Tensors
|
* into a single new Tensor where axis 0 is the batch dimension.
|
*/
|
function batchConcat(arrays) {
|
if (arrays.length === 0) {
|
// We can't return an empty Tensor because we don't know the element shape.
|
throw new Error('Can\'t make a batch of zero elements.');
|
}
|
if (arrays[0] instanceof tf.Tensor) {
|
// Input is an array of Tensors
|
return tf.stack(arrays);
|
}
|
else {
|
// Input is a possibly-nested array of numbers.
|
return tf.tensor(arrays);
|
}
|
}
|
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"dataset.js","sourceRoot":"","sources":["../../../../../tfjs-data/src/dataset.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAE5C,OAAO,KAAK,UAAU,MAAM,YAAY,CAAC;AAEzC,OAAO,EAAC,wBAAwB,EAAE,oBAAoB,EAAE,iBAAiB,EAAE,kBAAkB,EAAgB,eAAe,EAAC,MAAM,2BAA2B,CAAC;AAE/J,OAAO,EAAC,YAAY,EAAE,kBAAkB,EAAiB,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAO5F,qEAAqE;AAErE;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAsB,OAAO;IAA7B;QAWW,SAAI,GAAW,IAAI,CAAC;IA2c/B,CAAC;IAzcC,wEAAwE;IACxE,2EAA2E;IAC3E,4EAA4E;IAC5E,2CAA2C;IAC3C,uCAAuC;IAEvC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAsDG;IACH,KAAK,CAAC,SAAiB,EAAE,cAAc,GAAG,IAAI;QAC5C,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,EAAE,CAAC,IAAI,CAAC,MAAM,CACV,SAAS,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC;QACrB,SAAS,EAAE,CAAC,CAAC;QACjB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC/C,0EAA0E;YAC1E,QAAQ;YACR,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;SAClB;aAAM,IAAI,cAAc,EAAE;YACzB,yEAAyE;YACzE,gDAAgD;YAChD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,SAAS,CAAC,CAAC;SACzC;aAAM;YACL,yEAAyE;YACzE,oCAAoC;YACpC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,SAAS,CAAC,CAAC;SAC1C;QACD,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;iBACzB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,CAAC;QACpE,CAAC,EAAE,IAAI,CAAC,CAAC;IACX,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,WAAW,CAAC,OAAmB;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE;YACvD,mEAAmE;YACnE,YAAY;YACZ,IAAI,GAAG,QAAQ,CAAC;SACjB;aAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,OAAO,CAAC,IAAI,IAAI,IAAI,EAAE;YACpD,uEAAuE;YACvE,sCAAsC;YACtC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;SACjC;aAAM;YACL,0EAA0E;YAC1E,gDAAgD;YAChD,IAAI,GAAG,IAAI,CAAC;SACb;QACD,OAAO,qBAAqB,CACxB,KAAK,IAAI,EAAE,CACP,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,WAAW,CAAC,MAAM,OAAO,CAAC,QAAQ,EAAE,CAAC,EACjE,IAAI,CAAC,CAAC;IACZ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,MAAM,CAAC,SAAgC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE;YAC1B,gEAAgE;YAChE,IAAI,GAAG,QAAQ,CAAC;SACjB;aAAM;YACL,0EAA0E;YAC1E,sBAAsB;YACtB,IAAI,GAAG,IAAI,CAAC;SACb;QACD,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1E,CAAC,EAAE,IAAI,CAAC,CAAC;IACX,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,YAAY,CAAC,CAAqB;QACtC,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,GAAG,CAA+B,SAA0B;QAC1D,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACvE,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,QAAQ,CAA+B,SAAmC;QAExE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACrD,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED;;;;;;;;OAQG;IACH,QAAQ,CAAC,UAAkB;QACzB,IAAI,UAAU,IAAI,IAAI,EAAE;YACtB,MAAM,IAAI,UAAU,CAChB,2DAA2D,CAAC,CAAC;SAClE;QAED,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,OAAO,qBAAqB,CACxB,KAAK,IAAI,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3E,CAAC;IAED;;;;;;;;;;;;;;;;;OAiBG;IACH,MAAM,CAAC,KAAc;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,KAAK,GAAG,CAAC,EAAE;YAClC,sEAAsE;YACtE,sEAAsE;YACtE,YAAY;YACZ,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;SAC1B;aAAM,IAAI,KAAK,KAAK,CAAC,EAAE;YACtB,gCAAgC;YAChC,IAAI,GAAG,CAAC,CAAC;SACV;aAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,CAAC,CAAC,EAAE;YAClE,mEAAmE;YACnE,kEAAkE;YAClE,IAAI,GAAG,QAAQ,CAAC;SACjB;aAAM;YACL,uEAAuE;YACvE,IAAI,GAAG,IAAI,CAAC;SACb;QACD,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,MAAM,gBAAgB,GAAG,oBAAoB,CACzC,KAAK,IAAI,EAAE,CAAC,CAAC,EAAC,KAAK,EAAE,MAAM,IAAI,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,KAAK,EAAC,CAAC,CAAC,CAAC;YAC/D,OAAO,wBAAwB,CAAC,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAChE,CAAC,EAAE,IAAI,CAAC,CAAC;IACX,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,IAAI,CAAC,KAAa;QAChB,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,EAAE;YACzD,uEAAuE;YACvE,yEAAyE;YACzE,4BAA4B;YAC5B,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;SAC1B;aAAM,IACH,IAAI,CAAC,IAAI,IAAI,IAAI;YACjB,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,CAAC,CAAC,EAAE;YAC3D,iEAAiE;YACjE,yEAAyE;YACzE,IAAI,GAAG,CAAC,CAAC;SACV;aAAM;YACL,uEAAuE;YACvE,IAAI,GAAG,IAAI,CAAC;SACb;QACD,OAAO,qBAAqB,CACxB,KAAK,IAAI,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,CAAC;IAC7D,CAAC;IAMD;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,OAAO,CAAC,UAAkB,EAAE,IAAa,EAAE,sBAAsB,GAAG,IAAI;QAEtE,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,GAAG,CAAC,EAAE;YACxC,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;gBACrB,MAAM,IAAI,UAAU,CAChB,0DAA0D,CAAC,CAAC;aACjE;iBAAM;gBACL,MAAM,IAAI,UAAU,CAChB,4DAA4D;oBAC5D,6DAA6D;oBAC7D,yDAAyD;oBACzD,mCAAmC,IAAI,CAAC,IAAI,YAAY,CAAC,CAAC;aAC/D;SACF;QACD,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAC;QACjE,OAAO,qBAAqB,CAAC,KAAK,IAAI,EAAE;YACtC,IAAI,KAAK,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,sBAAsB,EAAE;gBAC1B,KAAK,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;aACzB;YACD,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QACvE,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,IAAI,CAAC,KAAa;QAChB,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,GAAG,KAAK,EAAE;YAC1C,uEAAuE;YACvE,iBAAiB;YACjB,IAAI,GAAG,KAAK,CAAC;SACd;aAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,EAAE;YAClD,sEAAsE;YACtE,8CAA8C;YAC9C,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;SAClB;aAAM;YACL,uEAAuE;YACvE,IAAI,GAAG,IAAI,CAAC;SACb;QACD,OAAO,qBAAqB,CACxB,KAAK,IAAI,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,CAAC;IAC7D,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE;YAC1B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;SACnE;QACD,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;IAC3C,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,cAAc;QAClB,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE;YAC1B,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;SACnE;QACD,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,cAAc,EAAE,CAAC;IAClD,CAAC;;AA7HD,uDAAuD;AAEvC,uBAAe,GAAG,KAAK,AAAR,CAAS;SA1VpB,OAAO;AAwd7B;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,qBAAqB,CACjC,UAA0C,EAC1C,OAAe,IAAI;IACrB,OAAO,IAAI,KAAM,SAAQ,OAAU;QAAxB;;YACA,SAAI,GAAG,IAAI,CAAC;QASvB,CAAC;QAPC;;;WAGG;QACH,KAAK,CAAC,QAAQ;YACZ,OAAO,UAAU,EAAE,CAAC;QACtB,CAAC;KACF,EACC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,KAAK,CAA+B,KAAU;IAC5D,OAAO,qBAAqB,CACxB,KAAK,IAAI,EAAE,CAAC,iBAAiB,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;AAC1D,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,MAAM,UAAU,GAAG,CAA+B,QAA0B;IAE1E,gDAAgD;IAChD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE;QACzB,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;KACtE;IACD,IAAI,IAAI,CAAC;IACT,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACxC,IAAI,GAAG,IAAI,IAAI,IAAI,CAAC,CAAC,CAAE,QAAQ,CAAC,CAAC,CAAgB,CAAC,IAAI,CAAC,CAAC;gBAClC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAG,QAAQ,CAAC,CAAC,CAAgB,CAAC,IAAI,CAAC,CAAC;SACxE;KACF;SAAM,IAAI,QAAQ,YAAY,MAAM,EAAE;QACrC,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE;YACzB,IAAI,GAAG,IAAI,IAAI,IAAI,CAAC,CAAC,CAAE,QAAQ,CAAC,EAAE,CAAgB,CAAC,IAAI,CAAC,CAAC;gBACnC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAG,QAAQ,CAAC,EAAE,CAAgB,CAAC,IAAI,CAAC,CAAC;SACzE;KACF;IACD,OAAO,qBAAqB,CAAI,KAAK,IAAI,EAAE;QACzC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE;YACrD,IAAI,CAAC,YAAY,OAAO,EAAE;gBACxB,OAAO,EAAC,KAAK,EAAE,CAAC,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,KAAK,EAAC,CAAC;aAC9C;iBAAM,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE;gBACxB,OAAO,EAAC,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAC,CAAC;aACrC;iBAAM;gBACL,MAAM,IAAI,KAAK,CACX,4DAA4D;oBAC5D,iBAAiB,CAAC,CAAC;aACxB;QACH,CAAC,CAAC,CAAC;QACH,OAAO,kBAAkB,CAAI,OAAO,EAAE,eAAe,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC,EAAE,IAAI,CAAC,CAAC;AACX,CAAC;AAED;;;;;;GAMG;AACH,kCAAkC;AAClC,SAAS,eAAe,CAAC,IAAW;IAClC,IAAI,IAAI,KAAK,IAAI,EAAE;QACjB,OAAO,IAAI,CAAC;KACb;IAED,iEAAiE;IACjE,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3B,IAAI,YAAY,CAAC,UAAU,CAAC,EAAE;QAC5B,mEAAmE;QACnE,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QAChC,OAAO,EAAC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAC,CAAC;KAChC;IAED,oDAAoD;IACpD,OAAO,EAAC,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAoC,MAAW;IAEjE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE;QACvB,2EAA2E;QAC3E,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;KAC1D;IAED,IAAI,MAAM,CAAC,CAAC,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE;QAClC,+BAA+B;QAC/B,OAAO,EAAE,CAAC,KAAK,CAAC,MAAqB,CAAC,CAAC;KACxC;SAAM;QACL,+CAA+C;QAC/C,OAAO,EAAE,CAAC,MAAM,CAAC,MAAoB,CAAC,CAAC;KACxC;AACH,CAAC","sourcesContent":["/**\n * @license\n * Copyright 2018 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * =============================================================================\n */\n\nimport * as tf from '@tensorflow/tfjs-core';\nimport {TensorContainer, TensorLike} from '@tensorflow/tfjs-core';\nimport * as seedrandom from 'seedrandom';\n\nimport {iteratorFromConcatenated, iteratorFromFunction, iteratorFromItems, iteratorFromZipped, LazyIterator, ZipMismatchMode} from './iterators/lazy_iterator';\nimport {Container} from './types';\nimport {canTensorify, deepMapAndAwaitAll, DeepMapResult, isIterable} from './util/deep_map';\n\n/**\n * A nested structure of Datasets, used as the input to zip().\n */\nexport type DatasetContainer = Container<Dataset<TensorContainer>>;\n\n// TODO(soergel): consider vectorized operations within the pipeline.\n\n/**\n * Represents a potentially large list of independent data elements (typically\n * 'samples' or 'examples').\n *\n * A 'data example' may be a primitive, an array, a map from string keys to\n * values, or any nested structure of these.\n *\n * A `Dataset` represents an ordered collection of elements, together with a\n * chain of transformations to be performed on those elements. Each\n * transformation is a method of `Dataset` that returns another `Dataset`, so\n * these may be chained, e.g.\n * `const processedDataset = rawDataset.filter(...).map(...).batch(...)`.\n *\n * Data loading and transformation is done in a lazy, streaming fashion.  The\n * dataset may be iterated over multiple times; each iteration starts the data\n * loading anew and recapitulates the transformations.\n *\n * A `Dataset` is typically processed as a stream of unbatched examples -- i.e.,\n * its transformations are applied one example at a time. Batching produces a\n * new `Dataset` where each element is a batch. Batching should usually come\n * last in a pipeline, because data transformations are easier to express on a\n * per-example basis than on a per-batch basis.\n *\n * The following code examples are calling `await dataset.forEachAsync(...)` to\n * iterate once over the entire dataset in order to print out the data.\n *\n * @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'}\n */\nexport abstract class Dataset<T extends tf.TensorContainer> {\n  /*\n   * Provide a new stream of elements.  Note this will also start new streams\n   * from any underlying `Dataset`s.\n   *\n   * CAUTION: Any Tensors contained within the elements returned from\n   * this stream *must* be manually disposed to avoid a GPU memory leak.\n   * The tf.tidy() approach cannot be used in an asynchronous context.\n   */\n  abstract iterator(): Promise<LazyIterator<T>>;\n\n  readonly size: number = null;\n\n  // TODO(soergel): Make Datasets report whether repeated iterator() calls\n  // produce the same result (e.g., reading from a file) or different results\n  // (e.g., from the webcam).  Currently we don't make this distinction but it\n  // could be important for the user to know.\n  // abstract isDeterministic(): boolean;\n\n  /**\n   * Groups elements into batches.\n   *\n   * It is assumed that each of the incoming dataset elements has the same\n   * structure -- i.e. the same set of keys at each location in an object\n   * hierarchy.  For each key, the resulting `Dataset` provides a batched\n   * element collecting all of the incoming values for that key.\n   *\n   *  * Incoming primitives are grouped into a 1-D Tensor.\n   *  * Incoming Tensors are grouped into a new Tensor where the 0th axis is\n   *    the batch dimension.\n   *  * Incoming arrays are converted to Tensor and then batched.\n   *  * A nested array is interpreted as an n-D Tensor, so the batched result\n   *    has n+1 dimensions.\n   *  * An array that cannot be converted to Tensor produces an error.\n   *\n   * If an array should not be batched as a unit, it should first be converted\n   * to an object with integer keys.\n   *\n   * Here are a few examples:\n   *\n   * Batch a dataset of numbers:\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8]).batch(4);\n   * await a.forEachAsync(e => e.print());\n   * ```\n   *\n   * Batch a dataset of arrays:\n   * ```js\n   * const b = tf.data.array([[1], [2], [3], [4], [5], [6], [7], [8]]).batch(4);\n   * await b.forEachAsync(e => e.print());\n   * ```\n   *\n   * Batch a dataset of objects:\n   * ```js\n   * const c = tf.data.array([{a: 1, b: 11}, {a: 2, b: 12}, {a: 3, b: 13},\n   *   {a: 4, b: 14}, {a: 5, b: 15}, {a: 6, b: 16}, {a: 7, b: 17},\n   *   {a: 8, b: 18}]).batch(4);\n   * await c.forEachAsync(e => {\n   *   console.log('{');\n   *   for(var key in e) {\n   *     console.log(key+':');\n   *     e[key].print();\n   *   }\n   *   console.log('}');\n   * })\n   * ```\n   *\n   * @param batchSize The number of elements desired per batch.\n   * @param smallLastBatch Whether to emit the final batch when it has fewer\n   *   than batchSize elements. Default true.\n   * @returns A `Dataset`, from which a stream of batches can be obtained.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  batch(batchSize: number, smallLastBatch = true): Dataset<tf.TensorContainer> {\n    const base = this;\n    tf.util.assert(\n        batchSize > 0, () => `batchSize needs to be positive, but it is\n      ${batchSize}`);\n    let size;\n    if (this.size === Infinity || this.size == null) {\n      // If the size of this dataset is infinity or null, the new size keeps the\n      // same.\n      size = this.size;\n    } else if (smallLastBatch) {\n      // If the size of this dataset is known and include small last batch, the\n      // new size is full batch count plus last batch.\n      size = Math.ceil(this.size / batchSize);\n    } else {\n      // If the size of this dataset is known and not include small last batch,\n      // the new size is full batch count.\n      size = Math.floor(this.size / batchSize);\n    }\n    return datasetFromIteratorFn(async () => {\n      return (await base.iterator())\n          .columnMajorBatch(batchSize, smallLastBatch, deepBatchConcat);\n    }, size);\n  }\n\n  /**\n   * Concatenates this `Dataset` with another.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3]);\n   * const b = tf.data.array([4, 5, 6]);\n   * const c = a.concatenate(b);\n   * await c.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param dataset A `Dataset` to be concatenated onto this one.\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  concatenate(dataset: Dataset<T>): Dataset<T> {\n    const base = this;\n    let size;\n    if (this.size === Infinity || dataset.size === Infinity) {\n      // If the size of any of these two dataset is infinity, new size is\n      // infinity.\n      size = Infinity;\n    } else if (this.size != null && dataset.size != null) {\n      // If the size of both datasets are known and not infinity, new size is\n      // sum the size of these two datasets.\n      size = this.size + dataset.size;\n    } else {\n      // If neither of these two datasets has infinite size and any of these two\n      // datasets' size is null, the new size is null.\n      size = null;\n    }\n    return datasetFromIteratorFn(\n        async () =>\n            (await base.iterator()).concatenate(await dataset.iterator()),\n        size);\n  }\n\n  /**\n   * Filters this dataset according to `predicate`.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n   *   .filter(x => x%2 === 0);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param predicate A function mapping a dataset element to a boolean or a\n   * `Promise` for one.\n   *\n   * @returns A `Dataset` of elements for which the predicate was true.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  filter(predicate: (value: T) => boolean): Dataset<T> {\n    const base = this;\n    let size;\n    if (this.size === Infinity) {\n      // If the size of this dataset is infinity, new size is infinity\n      size = Infinity;\n    } else {\n      // If this dataset has limited elements, new size is null because it might\n      // exhausted randomly.\n      size = null;\n    }\n    return datasetFromIteratorFn(async () => {\n      return (await base.iterator()).filter(x => tf.tidy(() => predicate(x)));\n    }, size);\n  }\n\n  /**\n   * Apply a function to every element of the dataset.\n   *\n   * After the function is applied to a dataset element, any Tensors contained\n   * within that element are disposed.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3]);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param f A function to apply to each dataset element.\n   * @returns A `Promise` that resolves after all elements have been processed.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  async forEachAsync(f: (input: T) => void): Promise<void> {\n    return (await this.iterator()).forEachAsync(f);\n  }\n\n  /**\n   * Maps this dataset through a 1-to-1 transform.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3]).map(x => x*x);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param transform A function mapping a dataset element to a transformed\n   *   dataset element.\n   *\n   * @returns A `Dataset` of transformed elements.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  map<O extends tf.TensorContainer>(transform: (value: T) => O): Dataset<O> {\n    const base = this;\n    return datasetFromIteratorFn(async () => {\n      return (await base.iterator()).map(x => tf.tidy(() => transform(x)));\n    }, this.size);\n  }\n\n  /**\n   * Maps this dataset through an async 1-to-1 transform.\n   *\n   * ```js\n   * const a =\n   *  tf.data.array([1, 2, 3]).mapAsync(x => new Promise(function(resolve){\n   *    setTimeout(() => {\n   *      resolve(x * x);\n   *    }, Math.random()*1000 + 500);\n   *  }));\n   * console.log(await a.toArray());\n   * ```\n   *\n   * @param transform A function mapping a dataset element to a `Promise` for a\n   *   transformed dataset element.  This transform is responsible for disposing\n   *   any intermediate `Tensor`s, i.e. by wrapping its computation in\n   *   `tf.tidy()`; that cannot be automated here (as it is in the synchronous\n   *   `map()` case).\n   *\n   * @returns A `Dataset` of transformed elements.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  mapAsync<O extends tf.TensorContainer>(transform: (value: T) => Promise<O>):\n      Dataset<O> {\n    const base = this;\n    return datasetFromIteratorFn(async () => {\n      return (await base.iterator()).mapAsync(transform);\n    }, this.size);\n  }\n\n  /**\n   *  Creates a `Dataset` that prefetches elements from this dataset.\n   *\n   * @param bufferSize: An integer specifying the number of elements to be\n   *   prefetched.\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  prefetch(bufferSize: number): Dataset<T> {\n    if (bufferSize == null) {\n      throw new RangeError(\n          '`Dataset.prefetch()` requires bufferSize to be specified.');\n    }\n\n    const base = this;\n    return datasetFromIteratorFn(\n        async () => (await base.iterator()).prefetch(bufferSize), this.size);\n  }\n\n  /**\n   * Repeats this dataset `count` times.\n   *\n   * NOTE: If this dataset is a function of global state (e.g. a random number\n   * generator), then different repetitions may produce different elements.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3]).repeat(3);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param count: (Optional) An integer, representing the number of times\n   *   the dataset should be repeated. The default behavior (if `count` is\n   *   `undefined` or negative) is for the dataset be repeated indefinitely.\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  repeat(count?: number): Dataset<T> {\n    const base = this;\n    let size;\n    if (this.size != null && count > 0) {\n      // If this dataset has size and count is positive, new size is current\n      // size multiply count. This also covers the case that current size is\n      // infinity.\n      size = this.size * count;\n    } else if (count === 0) {\n      // If count is 0, new size is 0.\n      size = 0;\n    } else if (this.size != null && (count === undefined || count < 0)) {\n      // If this dataset has size and count is undefined or negative, the\n      // dataset will be repeated indefinitely and new size is infinity.\n      size = Infinity;\n    } else {\n      // If the size of this dataset is null, the new dataset's size is null.\n      size = null;\n    }\n    return datasetFromIteratorFn(async () => {\n      const iteratorIterator = iteratorFromFunction(\n          async () => ({value: await base.iterator(), done: false}));\n      return iteratorFromConcatenated(iteratorIterator.take(count));\n    }, size);\n  }\n\n  /**\n   * Creates a `Dataset` that skips `count` initial elements from this dataset.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6]).skip(3);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param count: The number of elements of this dataset that should be skipped\n   *   to form the new dataset.  If `count` is greater than the size of this\n   *   dataset, the new dataset will contain no elements.  If `count`\n   *   is `undefined` or negative, skips the entire dataset.\n   *\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  skip(count: number): Dataset<T> {\n    const base = this;\n    let size;\n    if (this.size != null && count >= 0 && this.size >= count) {\n      // If the size of this dataset is greater than count, the new dataset's\n      // size is current size minus skipped size.This also covers the case that\n      // current size is infinity.\n      size = this.size - count;\n    } else if (\n        this.size != null &&\n        (this.size < count || count === undefined || count < 0)) {\n      // If the size of this dataset is smaller than count, or count is\n      // undefined or negative, skips the entire dataset and the new size is 0.\n      size = 0;\n    } else {\n      // If the size of this dataset is null, the new dataset's size is null.\n      size = null;\n    }\n    return datasetFromIteratorFn(\n        async () => (await base.iterator()).skip(count), size);\n  }\n\n  // TODO(soergel): deep sharded shuffle, where supported\n\n  static readonly MAX_BUFFER_SIZE = 10000;\n\n  /**\n   * Pseudorandomly shuffles the elements of this dataset. This is done in a\n   * streaming manner, by sampling from a given number of prefetched elements.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6]).shuffle(3);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param bufferSize: An integer specifying the number of elements from this\n   *   dataset from which the new dataset will sample.\n   * @param seed: (Optional) An integer specifying the random seed that will\n   *   be used to create the distribution.\n   * @param reshuffleEachIteration: (Optional) A boolean, which if true\n   *   indicates that the dataset should be pseudorandomly reshuffled each time\n   *   it is iterated over. If false, elements will be returned in the same\n   *   shuffled order on each iteration. (Defaults to `true`.)\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  shuffle(bufferSize: number, seed?: string, reshuffleEachIteration = true):\n      Dataset<T> {\n    if (bufferSize == null || bufferSize < 0) {\n      if (this.size == null) {\n        throw new RangeError(\n            '`Dataset.shuffle()` requires bufferSize to be specified.');\n      } else {\n        throw new RangeError(\n            '`Dataset.shuffle()` requires bufferSize to be specified.  ' +\n            'If your data fits in main memory (for regular JS objects), ' +\n            'and/or GPU memory (for `tf.Tensor`s), consider setting ' +\n            `bufferSize to the dataset size (${this.size} elements)`);\n      }\n    }\n    const base = this;\n    const random = seedrandom.alea(seed || tf.util.now().toString());\n    return datasetFromIteratorFn(async () => {\n      let seed2 = random.int32();\n      if (reshuffleEachIteration) {\n        seed2 += random.int32();\n      }\n      return (await base.iterator()).shuffle(bufferSize, seed2.toString());\n    }, this.size);\n  }\n\n  /**\n   * Creates a `Dataset` with at most `count` initial elements from this\n   * dataset.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6]).take(3);\n   * await a.forEachAsync(e => console.log(e));\n   * ```\n   *\n   * @param count: The number of elements of this dataset that should be taken\n   *   to form the new dataset.  If `count` is `undefined` or negative, or if\n   *   `count` is greater than the size of this dataset, the new dataset will\n   *   contain all elements of this dataset.\n   * @returns A `Dataset`.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  take(count: number): Dataset<T> {\n    const base = this;\n    let size;\n    if (this.size != null && this.size > count) {\n      // If the size of this dataset is greater than count, the new dataset's\n      // size is count.\n      size = count;\n    } else if (this.size != null && this.size <= count) {\n      // If the size of this dataset is equal or smaller than count, the new\n      // dataset's size is the size of this dataset.\n      size = this.size;\n    } else {\n      // If the size of this dataset is null, the new dataset's size is null.\n      size = null;\n    }\n    return datasetFromIteratorFn(\n        async () => (await base.iterator()).take(count), size);\n  }\n\n  /**\n   * Collect all elements of this dataset into an array.\n   *\n   * Obviously this will succeed only for small datasets that fit in memory.\n   * Useful for testing and generally should be avoided if possible.\n   *\n   * ```js\n   * const a = tf.data.array([1, 2, 3, 4, 5, 6]);\n   * console.log(await a.toArray());\n   * ```\n   *\n   * @returns A Promise for an array of elements, which will resolve\n   *   when a new stream has been obtained and fully consumed.\n   *\n   * @doc {heading: 'Data', subheading: 'Classes'}\n   */\n  async toArray() {\n    if (this.size === Infinity) {\n      throw new Error('Can not convert infinite data stream to array.');\n    }\n    return (await this.iterator()).toArray();\n  }\n\n  /**\n   * Collect all elements of this dataset into an array with prefetching 100\n   * elements. This is useful for testing, because the prefetch changes the\n   * order in which the Promises are resolved along the processing pipeline.\n   * This may help expose bugs where results are dependent on the order of\n   * Promise resolution rather than on the logical order of the stream (i.e.,\n   * due to hidden mutable state).\n   *\n   * @returns A Promise for an array of elements, which will resolve\n   *   when a new stream has been obtained and fully consumed.\n   */\n  async toArrayForTest() {\n    if (this.size === Infinity) {\n      throw new Error('Can not convert infinite data stream to array.');\n    }\n    return (await this.iterator()).toArrayForTest();\n  }\n}\n\n/**\n * Create a `Dataset` defined by a provided iterator() function.\n *\n * ```js\n * let i = -1;\n * const func = () =>\n *    ++i < 5 ? {value: i, done: false} : {value: null, done: true};\n * const iter = tf.data.iteratorFromFunction(func);\n * const ds = tf.data.datasetFromIteratorFn(iter);\n * await ds.forEachAsync(e => console.log(e));\n * ```\n */\nexport function datasetFromIteratorFn<T extends tf.TensorContainer>(\n    iteratorFn: () => Promise<LazyIterator<T>>,\n    size: number = null): Dataset<T> {\n  return new class extends Dataset<T> {\n    override size = size;\n\n    /*\n     * Provide a new stream of elements.  Note this will also start new streams\n     * from any underlying `Dataset`s.\n     */\n    async iterator(): Promise<LazyIterator<T>> {\n      return iteratorFn();\n    }\n  }\n  ();\n}\n\n/**\n * Create a `Dataset` from an array of elements.\n *\n * Create a Dataset from an array of objects:\n * ```js\n * const a = tf.data.array([{'item': 1}, {'item': 2}, {'item': 3}]);\n * await a.forEachAsync(e => console.log(e));\n * ```\n *\n * Create a Dataset from an array of numbers:\n * ```js\n * const a = tf.data.array([4, 5, 6]);\n * await a.forEachAsync(e => console.log(e));\n * ```\n * @param items An array of elements that will be parsed as items in a dataset.\n *\n * @doc {heading: 'Data', subheading: 'Creation', namespace: 'data'}\n */\nexport function array<T extends tf.TensorContainer>(items: T[]): Dataset<T> {\n  return datasetFromIteratorFn(\n      async () => iteratorFromItems(items), items.length);\n}\n\n/**\n * Create a `Dataset` by zipping together an array, dict, or nested\n * structure of `Dataset`s (and perhaps additional constants).\n * The underlying datasets must provide elements in a consistent order such that\n * they correspond.\n *\n * The number of elements in the resulting dataset is the same as the size of\n * the smallest dataset in datasets.\n *\n * The nested structure of the `datasets` argument determines the\n * structure of elements in the resulting iterator.\n *\n * Note this means that, given an array of two datasets that produce dict\n * elements, the result is a dataset that produces elements that are arrays\n * of two dicts:\n *\n * Zip an array of datasets:\n * ```js\n * console.log('Zip two datasets of objects:');\n * const ds1 = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);\n * const ds2 = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);\n * const ds3 = tf.data.zip([ds1, ds2]);\n * await ds3.forEachAsync(e => console.log(JSON.stringify(e)));\n *\n * // If the goal is to merge the dicts in order to produce elements like\n * // {a: ..., b: ...}, this requires a second step such as:\n * console.log('Merge the objects:');\n * const ds4 = ds3.map(x => {return {a: x[0].a, b: x[1].b}});\n * await ds4.forEachAsync(e => console.log(e));\n * ```\n *\n * Zip a dict of datasets:\n * ```js\n * const a = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);\n * const b = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);\n * const c = tf.data.zip({c: a, d: b});\n * await c.forEachAsync(e => console.log(JSON.stringify(e)));\n * ```\n *\n * @doc {heading: 'Data', subheading: 'Operations', namespace: 'data'}\n */\nexport function zip<O extends tf.TensorContainer>(datasets: DatasetContainer):\n    Dataset<O> {\n  // manually type-check the argument for JS users\n  if (!isIterable(datasets)) {\n    throw new Error('The argument to zip() must be an object or array.');\n  }\n  let size;\n  if (Array.isArray(datasets)) {\n    for (let i = 0; i < datasets.length; i++) {\n      size = size == null ? (datasets[i] as Dataset<O>).size :\n                            Math.min(size, (datasets[i] as Dataset<O>).size);\n    }\n  } else if (datasets instanceof Object) {\n    for (const ds in datasets) {\n      size = size == null ? (datasets[ds] as Dataset<O>).size :\n                            Math.min(size, (datasets[ds] as Dataset<O>).size);\n    }\n  }\n  return datasetFromIteratorFn<O>(async () => {\n    const streams = await deepMapAndAwaitAll(datasets, d => {\n      if (d instanceof Dataset) {\n        return {value: d.iterator(), recurse: false};\n      } else if (isIterable(d)) {\n        return {value: null, recurse: true};\n      } else {\n        throw new Error(\n            'Leaves of the structure passed to zip() must be Datasets, ' +\n            'not primitives.');\n      }\n    });\n    return iteratorFromZipped<O>(streams, ZipMismatchMode.SHORTEST);\n  }, size);\n}\n\n/**\n * A zip function for use with deepZip, passed via the columnMajorBatch call.\n *\n * Accepts an array of identically-structured nested elements and either batches\n * them (if they are primitives, numeric arrays, or Tensors) or requests\n * recursion (if not).\n */\n// tslint:disable-next-line:no-any\nfunction deepBatchConcat(rows: any[]): DeepMapResult {\n  if (rows === null) {\n    return null;\n  }\n\n  // use the first item to decide whether to recurse or batch here.\n  const exampleRow = rows[0];\n\n  if (canTensorify(exampleRow)) {\n    // rows is an array of primitives, Tensors, or arrays.  Batch them.\n    const value = batchConcat(rows);\n    return {value, recurse: false};\n  }\n\n  // the example row is an object, so recurse into it.\n  return {value: null, recurse: true};\n}\n\n/**\n * Assembles a list of same-shaped numbers, number arrays, or Tensors\n * into a single new Tensor where axis 0 is the batch dimension.\n */\nfunction batchConcat<T extends(TensorLike | tf.Tensor)>(arrays: T[]):\n    tf.Tensor {\n  if (arrays.length === 0) {\n    // We can't return an empty Tensor because we don't know the element shape.\n    throw new Error('Can\\'t make a batch of zero elements.');\n  }\n\n  if (arrays[0] instanceof tf.Tensor) {\n    // Input is an array of Tensors\n    return tf.stack(arrays as tf.Tensor[]);\n  } else {\n    // Input is a possibly-nested array of numbers.\n    return tf.tensor(arrays as TensorLike);\n  }\n}\n"]}
|