@huggingface/tasks
Version:
List of ML tasks for huggingface.co/tasks
204 lines (186 loc) • 5.94 kB
text/typescript
// This list is copied from gguf/types.ts, but will all types available (for backward compatibility)
// NOT to be confused with GGMLQuantizationType, a FileQuantization can contain multiple GGMLQuantizationType
// For example, Q4_K_M model can contains Q4_K and Q6_K tensors
export enum GGMLFileQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q4_1_SOME_F16 = 4,
Q4_2 = 5,
Q4_3 = 6,
Q8_0 = 7,
Q5_0 = 8,
Q5_1 = 9,
Q2_K = 10,
Q3_K_S = 11,
Q3_K_M = 12,
Q3_K_L = 13,
Q4_K_S = 14,
Q4_K_M = 15,
Q5_K_S = 16,
Q5_K_M = 17,
Q6_K = 18,
IQ2_XXS = 19,
IQ2_XS = 20,
Q2_K_S = 21,
IQ3_XS = 22,
IQ3_XXS = 23,
IQ1_S = 24,
IQ4_NL = 25,
IQ3_S = 26,
IQ3_M = 27,
IQ2_S = 28,
IQ2_M = 29,
IQ4_XS = 30,
IQ1_M = 31,
BF16 = 32,
Q4_0_4_4 = 33,
Q4_0_4_8 = 34,
Q4_0_8_8 = 35,
TQ1_0 = 36,
TQ2_0 = 37,
MXFP4_MOE = 38,
// custom quants used by unsloth
// they are not officially a scheme enum value in GGUF, but only here for naming
Q2_K_XL = 1000,
Q3_K_XL = 1001,
Q4_K_XL = 1002,
Q5_K_XL = 1003,
Q6_K_XL = 1004,
Q8_K_XL = 1005,
}
const ggufQuants = Object.values(GGMLFileQuantizationType).filter((v): v is string => typeof v === "string");
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");
export function parseGGUFQuantLabel(fname: string): string | undefined {
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
return quantLabel;
}
// order of quantization, from biggest to smallest
// this list must be in sync with the order in GGMLFileQuantizationType
// the gguf.spec.ts tests are using verify if the order is correct
export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
GGMLFileQuantizationType.F32,
GGMLFileQuantizationType.BF16,
GGMLFileQuantizationType.F16,
GGMLFileQuantizationType.Q8_K_XL,
GGMLFileQuantizationType.Q8_0,
// 6-bit quantizations
GGMLFileQuantizationType.Q6_K_XL,
GGMLFileQuantizationType.Q6_K,
// 5-bit quantizations
GGMLFileQuantizationType.Q5_K_XL,
GGMLFileQuantizationType.Q5_K_M,
GGMLFileQuantizationType.Q5_K_S,
GGMLFileQuantizationType.Q5_0,
GGMLFileQuantizationType.Q5_1,
// 4-bit quantizations
GGMLFileQuantizationType.Q4_K_XL,
GGMLFileQuantizationType.Q4_K_M,
GGMLFileQuantizationType.Q4_K_S,
GGMLFileQuantizationType.IQ4_NL,
GGMLFileQuantizationType.IQ4_XS,
GGMLFileQuantizationType.Q4_0_4_4,
GGMLFileQuantizationType.Q4_0_4_8,
GGMLFileQuantizationType.Q4_0_8_8,
GGMLFileQuantizationType.Q4_1_SOME_F16,
GGMLFileQuantizationType.Q4_0,
GGMLFileQuantizationType.Q4_1,
GGMLFileQuantizationType.Q4_2,
GGMLFileQuantizationType.Q4_3,
GGMLFileQuantizationType.MXFP4_MOE,
// 3-bit quantizations
GGMLFileQuantizationType.Q3_K_XL,
GGMLFileQuantizationType.Q3_K_L,
GGMLFileQuantizationType.Q3_K_M,
GGMLFileQuantizationType.Q3_K_S,
GGMLFileQuantizationType.IQ3_M,
GGMLFileQuantizationType.IQ3_S,
GGMLFileQuantizationType.IQ3_XS,
GGMLFileQuantizationType.IQ3_XXS,
// 2-bit quantizations
GGMLFileQuantizationType.Q2_K_XL,
GGMLFileQuantizationType.Q2_K,
GGMLFileQuantizationType.Q2_K_S,
GGMLFileQuantizationType.IQ2_M,
GGMLFileQuantizationType.IQ2_S,
GGMLFileQuantizationType.IQ2_XS,
GGMLFileQuantizationType.IQ2_XXS,
// 1-bit quantizations
GGMLFileQuantizationType.IQ1_S,
GGMLFileQuantizationType.IQ1_M,
GGMLFileQuantizationType.TQ1_0,
GGMLFileQuantizationType.TQ2_0,
];
// This function finds the nearest quantization type that is less than or equal to the given quantization type.
// It returns undefined if no such quantization type is found.
export function findNearestQuantType(
quant: GGMLFileQuantizationType,
availableQuants: GGMLFileQuantizationType[]
): GGMLFileQuantizationType | undefined {
// Create a map for quick index lookup from the defined order
const orderMap = new Map<GGMLFileQuantizationType, number>();
GGUF_QUANT_ORDER.forEach((q, index) => {
orderMap.set(q, index);
});
const targetIndex = orderMap.get(quant) ?? 0; // the 0 case should never happen
// Filter the available quantizations to include only those defined in the order map,
// then sort them according to the GGUF_QUANT_ORDER (from largest/index 0 to smallest/highest index).
const sortedAvailable = availableQuants
.filter((q) => orderMap.has(q))
.sort((a, b) => (orderMap.get(a) ?? Infinity) - (orderMap.get(b) ?? Infinity));
// If no valid quantizations are available after filtering
if (sortedAvailable.length === 0) {
return undefined;
}
// Iterate through the sorted available quantizations (largest to smallest).
// Find the first one whose order index is >= the target index.
// This means finding the largest quantization that is smaller than or equal to the target.
for (const availableQuant of sortedAvailable) {
// We know the key exists due to the filter above.
const availableIndex = orderMap.get(availableQuant) ?? 0;
if (availableIndex >= targetIndex) {
return availableQuant;
}
}
// If the loop completes, it means all available quantizations are larger (have a smaller index)
// than the target quantization. In this case, return the "smallest" available quantization,
// which is the last element in the sorted list (highest index among available).
return sortedAvailable[sortedAvailable.length - 1];
}
// This list is only used to calculate the size of the model, NOT to be confused with the quantization FILE type
export enum GGMLQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2_K = 10,
Q3_K = 11,
Q4_K = 12,
Q5_K = 13,
Q6_K = 14,
Q8_K = 15,
IQ2_XXS = 16,
IQ2_XS = 17,
IQ3_XXS = 18,
IQ1_S = 19,
IQ4_NL = 20,
IQ3_S = 21,
IQ2_S = 22,
IQ4_XS = 23,
I8 = 24,
I16 = 25,
I32 = 26,
I64 = 27,
F64 = 28,
IQ1_M = 29,
BF16 = 30,
TQ1_0 = 34,
TQ2_0 = 35,
MXFP4 = 39,
}