UNPKG

remeda

Version:

A utility library for JavaScript and Typescript.

1 lines 9.29 kB
{"version":3,"file":"sample.cjs","names":["purry"],"sources":["../src/sample.ts"],"sourcesContent":["import type {\n FixedLengthArray,\n IsEqual,\n IsNever,\n NonNegativeInteger,\n Or,\n Writable,\n} from \"type-fest\";\nimport type { CoercedArray } from \"./internal/types/CoercedArray\";\nimport type { IterableContainer } from \"./internal/types/IterableContainer\";\nimport type { NTuple } from \"./internal/types/NTuple\";\nimport type { PartialArray } from \"./internal/types/PartialArray\";\nimport type { TupleParts } from \"./internal/types/TupleParts\";\nimport { purry } from \"./purry\";\n\ntype Sampled<T extends IterableContainer, N extends number> =\n Or<IsEqual<N, 0>, IsEqual<T[\"length\"], 0>> extends true\n ? // Short-circuit on trivial inputs.\n []\n : IsNever<NonNegativeInteger<N>> extends true\n ? SampledPrimitive<T>\n : IsLongerThan<T, N> extends true\n ? SampledLiteral<T, N>\n : // If our tuple can never fulfil the sample size the only valid sample\n // is the whole input tuple. Because it's a shallow clone we also\n // strip any readonly-ness.\n Writable<T>;\n\n/**\n * When N is not a non-negative integer **literal** we can't use it in our\n * reconstructing logic so we fallback to a simpler definition of the output of\n * sample, which is any sub-tuple shape of T, of **any length**.\n */\ntype SampledPrimitive<T extends IterableContainer> = [\n ...FixedSubTuples<TupleParts<T>[\"required\"]>,\n // TODO: This might be accurate, but We currently have no tests that check optional elements!\n ...PartialArray<FixedSubTuples<TupleParts<T>[\"optional\"]>>,\n ...CoercedArray<TupleParts<T>[\"item\"]>,\n ...FixedSubTuples<TupleParts<T>[\"suffix\"]>,\n];\n\n/**\n * Knowing N is a non-negative literal integer we can construct all sub-tuples\n * of T that are exactly N elements long.\n */\ntype SampledLiteral<T extends IterableContainer, N extends number> =\n | Extract<\n FixedSubTuples<\n [\n ...TupleParts<T>[\"required\"],\n // TODO: This deliberately ignores optional elements which we don't have tests for either. In order to handle optional elements we can treat the \"optional\" tuple-part as more required elements.\n // We add N elements of the `item` type to the tuple so that we\n // consider any combination possible of elements of the prefix items,\n // any amount of rest items, and suffix items.\n ...(IsNever<TupleParts<T>[\"item\"]> extends true\n ? []\n : NTuple<TupleParts<T>[\"item\"], N>),\n ...TupleParts<T>[\"suffix\"],\n ]\n >,\n // This is just [unknown, unknown, ..., unknown] with N elements.\n FixedLengthArray<unknown, N>\n >\n // In addition to all sub-tuples of length N, we also need to consider all\n // tuples where the input is shorter than N. This will contribute exactly\n // one sub-tuple at each length from the minimum length of T and up to N-1.\n | SubSampled<\n TupleParts<T>[\"required\"],\n // TODO: This deliberately ignores optional elements which we don't have tests for either. In order to handle optional elements we can treat the \"optional\" tuple-part as more required elements.\n TupleParts<T>[\"item\"],\n TupleParts<T>[\"suffix\"],\n N\n >;\n\n// We want to create a union of all sub-tuples where we incrementally add an\n// additional element of the type of the rest element in the middle between the\n// prefix and suffix until we \"fill\" the tuple to size N.\ntype SubSampled<\n Prefix extends ReadonlyArray<unknown>,\n Item,\n Suffix extends ReadonlyArray<unknown>,\n N extends number,\n> =\n IsLongerThan<[...Prefix, ...Suffix], N> extends true\n ? // We need to prevent overflows in case Prefix and Suffix are already long\n // enough\n never\n : [...Prefix, ...Suffix][\"length\"] extends N\n ? never\n : [...Prefix, ...Suffix] | SubSampled<[...Prefix, Item], Item, Suffix, N>;\n\ntype IsLongerThan<T extends ReadonlyArray<unknown>, N extends number> =\n // Checking for `undefined` is a neat trick to avoid needing to compare\n // integer literals because if N overflows the tuple then the type for that\n // element will be `undefined`. This only works for fixed tuples!\n IsEqual<T[N], undefined> extends true ? false : true;\n\n// Assuming T is a fixed tuple we build all it's possible sub-tuples.\ntype FixedSubTuples<T> = T extends readonly [infer Head, ...infer Rest]\n ? // For each element we either take it or skip it, and recurse over the rest.\n FixedSubTuples<Rest> | [Head, ...FixedSubTuples<Rest>]\n : [];\n\n/**\n * Returns a random subset of size `sampleSize` from `array`.\n *\n * Maintains and infers most of the typing information that could be passed\n * along to the output. This means that when using tuples, the output will be\n * a tuple too, and when using literals, those literals would be preserved.\n *\n * The items in the result are kept in the same order as they are in the input.\n * If you need to get a shuffled response you can pipe the shuffle function\n * after this one.\n *\n * @param data - The array.\n * @param sampleSize - The number of elements to take.\n * @signature\n * R.sample(array, sampleSize)\n * @example\n * R.sample([\"hello\", \"world\"], 1); // => [\"hello\"] // typed string[]\n * R.sample([\"hello\", \"world\"] as const, 1); // => [\"world\"] // typed [\"hello\" | \"world\"]\n * @dataFirst\n * @category Array\n */\nexport function sample<const T extends IterableContainer, N extends number>(\n data: T,\n sampleSize: N,\n): Sampled<T, N>;\n\n/**\n * Returns a random subset of size `sampleSize` from `array`.\n *\n * Maintains and infers most of the typing information that could be passed\n * along to the output. This means that when using tuples, the output will be\n * a tuple too, and when using literals, those literals would be preserved.\n *\n * The items in the result are kept in the same order as they are in the input.\n * If you need to get a shuffled response you can pipe the shuffle function\n * after this one.\n *\n * @param sampleSize - The number of elements to take.\n * @signature\n * R.sample(sampleSize)(array)\n * @example\n * R.sample(1)([\"hello\", \"world\"]); // => [\"hello\"] // typed string[]\n * R.sample(1)([\"hello\", \"world\"] as const); // => [\"world\"] // typed [\"hello\" | \"world\"]\n * @dataLast\n * @category Array\n */\nexport function sample<const T extends IterableContainer, N extends number>(\n sampleSize: N,\n): (data: T) => Sampled<T, N>;\n\nexport function sample(...args: ReadonlyArray<unknown>): unknown {\n return purry(sampleImplementation, args);\n}\n\nfunction sampleImplementation<T>(\n data: ReadonlyArray<T>,\n sampleSize: number,\n): Array<T> {\n if (sampleSize <= 0) {\n // Trivial\n return [];\n }\n\n if (sampleSize >= data.length) {\n // Trivial\n return [...data];\n }\n\n // We have 2 modes of sampling, depending on the size of the sample requested.\n // 1. If sampleSize is _small_, we generate indices that we then use to\n // *EXTRACT* individual elements from the array.\n // 2. If sampleSize is _large_, we instead generate indices to *EXCLUDE* from\n // a full scan of the input array (via filtering).\n //\n // This allows us 2 optimizations that are the core of how this function\n // works:\n // 1. It is hard to generate a large number of unique indices, as the more\n // indices we generate the more likely we are to generate one that is already\n // in the set, which would require more iterations of the generation loop.\n // Capping our effective sampleSize at n/2 would put an upper limit to the\n // average number of iterations required (as a function of n).\n // 2. If sample size is small enough, we never need to actually iterate over\n // the full input array at all; instead we simply project the values we need\n // via random access into the array. This means that for sampleSize (K) less\n // than n/2, we run at O(klogk). For large sampleSize we need to iterate over\n // the full input array, but we don't need to sort the indices because we can\n // use the Set's 'has' method, so we effectively run at O(n).\n const actualSampleSize = Math.min(sampleSize, data.length - sampleSize);\n\n const sampleIndices = new Set<number>();\n while (sampleIndices.size < actualSampleSize) {\n const randomIndex = Math.floor(Math.random() * data.length);\n sampleIndices.add(randomIndex);\n }\n\n if (sampleSize === actualSampleSize) {\n return [...sampleIndices]\n .sort((a, b) => a - b)\n .map((index) => data[index]!);\n }\n\n return data.filter((_, index) => !sampleIndices.has(index));\n}\n"],"mappings":"wCAyJA,SAAgB,EAAO,GAAG,EAAuC,CAC/D,OAAOA,EAAAA,EAAM,EAAsB,EAAK,CAG1C,SAAS,EACP,EACA,EACU,CACV,GAAI,GAAc,EAEhB,MAAO,EAAE,CAGX,GAAI,GAAc,EAAK,OAErB,MAAO,CAAC,GAAG,EAAK,CAsBlB,IAAM,EAAmB,KAAK,IAAI,EAAY,EAAK,OAAS,EAAW,CAEjE,EAAgB,IAAI,IAC1B,KAAO,EAAc,KAAO,GAAkB,CAC5C,IAAM,EAAc,KAAK,MAAM,KAAK,QAAQ,CAAG,EAAK,OAAO,CAC3D,EAAc,IAAI,EAAY,CAShC,OANI,IAAe,EACV,CAAC,GAAG,EAAc,CACtB,MAAM,EAAG,IAAM,EAAI,EAAE,CACrB,IAAK,GAAU,EAAK,GAAQ,CAG1B,EAAK,QAAQ,EAAG,IAAU,CAAC,EAAc,IAAI,EAAM,CAAC"}