UNPKG

base2048

Version:

Binary encoding optimised for Twitter

github.com/qntm/base2048

139 lines (110 loc) • 5 kB

JavaScript

/** Base2048 is a binary-to-text encoding optimised for transmitting data through Twitter. */ // Z is a number, usually a uint11 but sometimes a uint3 const BITS_PER_CHAR = 11 // Base2048 is an 11-bit encoding const BITS_PER_BYTE = 8 // Compressed representation of inclusive ranges of characters used in this encoding. const pairStrings = [ '89AZazÆÆÐÐØØÞßææððøøþþĐđĦħııĸĸŁłŊŋŒœŦŧƀƟƢƮƱǃǝǝǤǥǶǷȜȝȠȥȴʯͰͳͶͷͻͽͿͿΑΡΣΩαωϏϏϗϯϳϳϷϸϺϿЂЂЄІЈЋЏИКикяђђєіјћџѵѸҁҊӀӃӏӔӕӘәӠӡӨөӶӷӺԯԱՖաֆאתװײؠءاؿفي٠٩ٮٯٱٴٹڿہہۃےەەۮۼۿۿܐܐܒܯݍޥޱޱ߀ߪࠀࠕࡀࡘࡠࡪࢠࢴࢶࢽऄनपरलळवहऽऽॐॐॠॡ०९ॲঀঅঌএঐওনপরললশহঽঽৎৎৠৡ০ৱ৴৹ৼৼਅਊਏਐਓਨਪਰਲਲਵਵਸਹੜੜ੦੯ੲੴઅઍએઑઓનપરલળવહઽઽૐૐૠૡ૦૯ૹૹଅଌଏଐଓନପରଲଳଵହଽଽୟୡ୦୯ୱ୷ஃஃஅஊஎஐஒஓககஙசஜஜஞடணதநபமஹௐௐ௦௲అఌఎఐఒనపహఽఽౘౚౠౡ౦౯౸౾ಀಀಅಌಎಐಒನಪಳವಹಽಽೞೞೠೡ೦೯ೱೲഅഌഎഐഒഺഽഽൎൎൔൖ൘ൡ൦൸ൺൿඅඖකනඳරලලවෆ෦෯กะาาเๅ๐๙ກຂຄຄງຈຊຊຍຍດທນຟມຣລລວວສຫອະາາຽຽເໄ໐໙ໞໟༀༀ༠༳ཀགངཇཉཌཎདནབམཛཝཨཪཬྈྌကဥဧဪဿ၉ၐၕ', '07' ] const lookupE = {} const lookupD = {} pairStrings.forEach((pairString, r) => { // Decompression const encodeRepertoire = [] pairString.match(/../gu).forEach(pair => { const first = pair.codePointAt(0) const last = pair.codePointAt(1) for (let codePoint = first; codePoint <= last; codePoint++) { encodeRepertoire.push(String.fromCodePoint(codePoint)) } }) const numZBits = BITS_PER_CHAR - BITS_PER_BYTE * r // 0 -> 11, 1 -> 3 lookupE[numZBits] = encodeRepertoire encodeRepertoire.forEach((chr, z) => { lookupD[chr] = [numZBits, z] }) }) export const encode = uint8Array => { const length = uint8Array.length let str = '' let z = 0 let numZBits = 0 for (let i = 0; i < length; i++) { const uint8 = uint8Array[i] // Take most significant bit first for (let j = BITS_PER_BYTE - 1; j >= 0; j--) { const bit = (uint8 >> j) & 1 z = (z << 1) + bit numZBits++ if (numZBits === BITS_PER_CHAR) { str += lookupE[numZBits][z] z = 0 numZBits = 0 } } } if (numZBits !== 0) { // Final bits require special treatment. // byte = bbbcccccccc, numBits = 11, padBits = 0 // byte = bbcccccccc, numBits = 10, padBits = 1 // byte = bcccccccc, numBits = 9, padBits = 2 // byte = cccccccc, numBits = 8, padBits = 3 // byte = ccccccc, numBits = 7, padBits = 4 // byte = cccccc, numBits = 6, padBits = 5 // byte = ccccc, numBits = 5, padBits = 6 // byte = cccc, numBits = 4, padBits = 7 // => Pad `byte` out to 11 bits using 1s, then encode as normal (repertoire 0) // byte = ccc, numBits = 3, padBits = 0 // byte = cc, numBits = 2, padBits = 1 // byte = c, numBits = 1, padBits = 2 // => Pad `byte` out to 3 bits using 1s, then encode specially (repertoire 1) while (!(numZBits in lookupE)) { z = (z << 1) + 1 numZBits++ } str += lookupE[numZBits][z] } return str } export const decode = str => { const length = str.length // This length is a guess. There's a chance we allocate one more byte here // than we actually need. But we can count and slice it off later const uint8Array = new Uint8Array(Math.floor(length * BITS_PER_CHAR / BITS_PER_BYTE)) let numUint8s = 0 let uint8 = 0 let numUint8Bits = 0 for (let i = 0; i < length; i++) { const chr = str.charAt(i) if (!(chr in lookupD)) { throw new Error(`Unrecognised Base2048 character: ${chr}`) } const [numZBits, z] = lookupD[chr] if (numZBits !== BITS_PER_CHAR && i !== length - 1) { throw new Error('Secondary character found before end of input at position ' + String(i)) } // Take most significant bit first for (let j = numZBits - 1; j >= 0; j--) { const bit = (z >> j) & 1 uint8 = (uint8 << 1) + bit numUint8Bits++ if (numUint8Bits === BITS_PER_BYTE) { uint8Array[numUint8s] = uint8 numUint8s++ uint8 = 0 numUint8Bits = 0 } } } // Final padding bits! Requires special consideration! // Remember how we always pad with 1s? // Note: there could be 0 such bits, check still works though if (uint8 !== ((1 << numUint8Bits) - 1)) { throw new Error('Padding mismatch') } return new Uint8Array(uint8Array.buffer, 0, numUint8s) }