diff --git a/playground/index-raw.html b/playground/index-raw.html index 0d33c2b..97ff25c 100644 --- a/playground/index-raw.html +++ b/playground/index-raw.html @@ -97,9 +97,10 @@
Two additional methods, toPartialBase64 and fromPartialBase64, allow encoding and decoding chunks of base64. This requires managing state, which is handled by returning a { result, extra } pair. The options bag for these methods takes two additional arguments, one which specifies whether more data is expected and one which specifies any extra values returned by a previous call.
Two additional methods, toPartialBase64 and fromPartialBase64, allow encoding and decoding chunks of base64. This requires managing state, which is handled by returning a { result, extra } pair; the extra argument must be round-tripped by the user to the next call as part of the options bag.
toPartialBase64 requires specifing a more parameter, which should be true for all but the final chunk, in order to generate the final characters and any necessary padding.
These methods are intended for lower-level use and are less convenient to use.
-Streaming versions of the hex APIs are not included since they are straightforward to do manually.
+Streaming versions of the hex APIs are not included at this time. That may change.
Streaming an ArrayBuffer into chunks of base64 strings:
@@ -116,6 +117,8 @@ Streaming
}
({ result } = extra.toPartialBase64({ more: false }));
resultChunks.push(result);
+// result chunks are guaranteed to be correct-padded base64 strings
+
console.log(resultChunks);
// ['mpmZmZmZ', 'uT+amZmZ', 'mZnJPzMz', 'MzMzM9M/', 'mpmZmZmZ', '', '2T8=']
@@ -123,19 +126,17 @@ Streaming base64 strings into Uint8Arrays:
let chunks = ['mpmZmZmZuT+am', 'ZmZmZnJPzMz', 'MzMz', 'M9M/mpmZmZmZ', '2T8='];
-// individual chunks are not necessarily correctly-padded base64 strings
+// individual chunks are not required to be correctly-padded base64 strings
let output = new Uint8Array(new ArrayBuffer(0, { maxByteLength: 1024 }));
let result, extra;
for (let c of chunks) {
- ({ result, extra } = Uint8Array.fromPartialBase64(c, { more: true, extra }));
+ ({ result, extra, } = Uint8Array.fromPartialBase64(c, { extra }));
let offset = output.length;
let newLength = offset + result.length;
output.buffer.resize(newLength);
output.set(result, offset);
}
-// if padding was optional,
-// you'd need to do a final `fromPartialBase64` call here with `more: false`
console.log(new Float64Array(output.buffer));
// Float64Array([0.1, 0.2, 0.3, 0.4])
@@ -191,6 +192,76 @@ Streaming
.pipeTo(sink);
+fromPartialBase64 additionally allows you to write data into to an existing buffer. This is handled by an into argument in the options bag, together with inputOffset and outputOffset arguments, as well as read and
+let input = 'SGVsbG8gV29ybGQ=';
+let buffer = new Uint8Array(4);
+let outputOffset = 0;
+let inputOffset = 0;
+let extra, written, read;
+
+while (inputOffset < input.length) {
+ 0, { extra, written, read } = Uint8Array.fromPartialBase64(input, {
+ extra,
+ into: buffer,
+ inputOffset,
+ outputOffset,
+ });
+
+ inputOffset += read;
+ outputOffset += written;
+
+ if (outputOffset === buffer.length) {
+ // output is full; consume it
+ console.log([...buffer]);
+ outputOffset = 0;
+ }
+}
+if (outputOffset > 0) {
+ console.log([...buffer].slice(0, outputOffset));
+}
+
+
+This can be combined with streaming as follows:
+
+let chunks = ['VGhpcyB', 'pcyBzb2', '1lIGV4YW1w', 'bGUgZGF0YS4='];
+let output = new Uint8Array(4);
+let outputOffset = 0;
+let extra;
+for (let chunk of chunks) {
+ let written, read;
+ let inputOffset = 0;
+
+ while (inputOffset < chunk.length) {
+ 0, { extra, written, read } = Uint8Array.fromPartialBase64(chunk, {
+ extra,
+ into: output,
+ inputOffset,
+ outputOffset,
+ });
+
+ inputOffset += read;
+ outputOffset += written;
+
+ if (outputOffset === output.length) {
+ // output is full; consume it
+ console.log([...output]);
+ outputOffset = 0;
+ }
+ }
+}
+if (outputOffset > 0) {
+ console.log([...output].slice(0, outputOffset));
+}
+
+
+This is guaranteed to fill the provided Uint8Array if enough input is available to do so.
+ +At the moment there is no facility to do so. We will likely add a fromPartialHex method similar to fromPartialHex which provides this ability.
+
diff --git a/playground/polyfill-core.mjs b/playground/polyfill-core.mjs
index 5756f0f..b17568d 100644
--- a/playground/polyfill-core.mjs
+++ b/playground/polyfill-core.mjs
@@ -20,7 +20,7 @@ function assert(condition, message) {
}
}
-function alphabetFromIdentifier(alphabet) {
+export function alphabetFromIdentifier(alphabet) {
if (alphabet === 'base64') {
return base64Characters;
} else if (alphabet === 'base64url') {
@@ -77,101 +77,136 @@ export function uint8ArrayToBase64(arr, alphabetIdentifier = 'base64', more = fa
}
}
-export function base64ToUint8Array(str, alphabetIdentifier = 'base64', more = false, origExtra = null) {
- if (typeof str !== 'string') {
- throw new TypeError('expected str to be a string');
- }
- let alphabet = alphabetFromIdentifier(alphabetIdentifier);
- more = !!more;
- if (origExtra != null) {
- if (typeof origExtra !== 'string') {
- throw new TypeError('expected extra to be a string');
+// this is extremely inefficient, but easy to reason about
+// actual implementations should use something more efficient except possibly at boundaries
+function decodeOneBase64Character(extraBitCount, extraBits, alphabetMap, char) {
+ let val = alphabetMap.get(char);
+ switch (extraBitCount) {
+ case 0: {
+ // i.e., this is the first of 4 characters
+ return { extraBitCount: 6, extraBits: val, byte: null };
}
- str = origExtra + str;
- }
- let map = new Map(alphabet.split('').map((c, i) => [c, i]));
-
- let extra;
- if (more) {
- let padding = str.length % 4;
- if (padding === 0) {
- extra = '';
- } else {
- extra = str.slice(-padding);
- str = str.slice(0, -padding)
+ case 2: {
+ // i.e., this is the 4th of 4 characters
+ return { extraBitCount: 0, extraBits: 0, byte: (extraBits << 6) | val };
}
- } else {
- // todo opt-in optional padding
- if (str.length % 4 !== 0) {
- throw new Error('not correctly padded');
+ case 4: {
+ // i.e., this is the 3rd of 4 characters
+ return { extraBitCount: 2, extraBits: val & 0b11, byte: (extraBits << 4) | ((val & 0b111100) >> 2) };
+ }
+ case 6: {
+ // i.e., this is the 2nd of 4 characters
+ return { extraBitCount: 4, extraBits: val & 0b1111, byte: (extraBits << 2) | ((val & 0b110000) >> 4) };
+ }
+ default: {
+ throw new Error(`unreachable: extraBitCount ${extraBitCount}`);
}
- extra = null;
- }
- assert(str.length % 4 === 0, 'str.length % 4 === 0');
- if (str.endsWith('==')) {
- str = str.slice(0, -2);
- } else if (str.endsWith('=')) {
- str = str.slice(0, -1);
}
+}
- let result = [];
- let i = 0;
- for (; i + 3 < str.length; i += 4) {
- let c1 = str[i];
- let c2 = str[i + 1];
- let c3 = str[i + 2];
- let c4 = str[i + 3];
- if ([c1, c2, c3, c4].some(c => !map.has(c))) {
- throw new Error('bad character');
- }
- let triplet =
- (map.get(c1) << 18) +
- (map.get(c2) << 12) +
- (map.get(c3) << 6) +
- map.get(c4);
- result.push(
- (triplet >> 16) & 255,
- (triplet >> 8) & 255,
- triplet & 255
- );
- }
- // TODO if we want to be _really_ pedantic, following the RFC, we should enforce the extra 2-4 bits are 0
- if (i + 2 === str.length) {
- // the `==` case
- let c1 = str[i];
- let c2 = str[i + 1];
- if ([c1, c2].some(c => !map.has(c))) {
- throw new Error('bad character');
+// TODO simplify
+function countFullBytesInBase64StringIncludingExtraBits(str, extraBitCount) {
+ if (str === '=' && extraBitCount === 0) {
+ // special case arising when a `=` char is the second half of a `==` pair
+ return 0;
+ }
+ let paddingCharCount = str.endsWith('==') ? 2 : str.endsWith('=') ? 1 : 0;
+ let fullChunks = Math.floor((str.length - paddingCharCount) / 4);
+ let bytesFromFullChunks = fullChunks * 3;
+ if (paddingCharCount === 2) {
+ let extraCharCount = (str.length - 2) % 4;
+ let isCorrectlyPadded =
+ (extraCharCount === 0 && extraBitCount === 4)
+ || (extraCharCount === 1 && extraBitCount === 6)
+ || (extraCharCount === 2 && extraBitCount === 0)
+ || (extraCharCount === 3 && extraBitCount === 2);
+ if (!isCorrectlyPadded) {
+ throw new Error('string is incorrectly padded');
}
- let triplet =
- (map.get(c1) << 18) +
- (map.get(c2) << 12);
- result.push((triplet >> 16) & 255);
- } else if (i + 3 === str.length) {
- // the `=` case
- let c1 = str[i];
- let c2 = str[i + 1];
- let c3 = str[i + 2];
- if ([c1, c2, c3].some(c => !map.has(c))) {
- throw new Error('bad character');
+ let bytesFromExtraChars =
+ extraCharCount === 0 ? 0
+ : extraCharCount === 1 ? 1
+ : extraCharCount === 2 ? 1
+ : extraCharCount === 3 ? 2
+ : unreachable();
+ return bytesFromFullChunks + bytesFromExtraChars;
+ } else if (paddingCharCount === 1) {
+ let extraCharCount = (str.length - 1) % 4;
+ let isCorrectlyPadded = // the '||' cases arise when the string is cut off halfway through a `==` pair
+ (extraCharCount === 0 && (extraBitCount === 2 || extraBitCount === 4))
+ || (extraCharCount === 1 && (extraBitCount === 4 || extraBitCount === 6))
+ || (extraCharCount === 2 && (extraBitCount === 6 || extraBitCount === 0))
+ || (extraCharCount === 3 && (extraBitCount === 0 || extraBitCount === 2));
+ if (!isCorrectlyPadded) {
+ throw new Error('string is incorrectly padded');
}
- let triplet =
- (map.get(c1) << 18) +
- (map.get(c2) << 12) +
- (map.get(c3) << 6);
- result.push(
- (triplet >> 16) & 255,
- (triplet >> 8) & 255,
- );
+ let bytesFromExtraChars =
+ extraCharCount === 0 ? 0
+ : extraCharCount === 1 ? 1
+ : extraCharCount === 2 ? (extraBitCount === 6 ? 2 : 1)
+ : extraCharCount === 3 ? 2
+ : unreachable();
+ return bytesFromFullChunks + bytesFromExtraChars;
} else {
- assert(i === str.length);
+ let extraCharCount = (str.length) % 4;
+ let bytesFromExtraChars =
+ extraCharCount === 0 ? 0 // 0 bits from overflow, plus extra bits
+ : extraCharCount === 1 ? (extraBitCount === 0 ? 0 : 1) // 6 bits from overflow, plus extra bits
+ : extraCharCount === 2 ? (extraBitCount === 4 || extraBitCount === 6 ? 2 : 1) // 12 bits from overflow, plus extra bits
+ : extraCharCount === 3 ? (extraBitCount === 6 ? 3 : 2) // 18 bits from overflow, plus extra bits
+ : unreachable();
+ return bytesFromFullChunks + bytesFromExtraChars;
}
+}
- return {
- result: new Uint8Array(result),
- extra,
- };
+export function base64ToUint8Array(str, alphabet, into = null, extraBitCount = 0, extraBits = 0, inputOffset = 0, outputOffset = 0) {
+ let alphabetMap = new Map(alphabet.split('').map((c, i) => [c, i]));
+ str = str.slice(inputOffset);
+ let codepoints = [...str]; // NB does not validate characters before inputOffset - should it? probably already been validated, but might be faster to just run on the whole string
+ if (codepoints.some(((c, i) => c === '=' && !(i === codepoints.length - 1 || i === codepoints.length - 2) || c !== '=' && !alphabetMap.has(c)))) {
+ throw new Error('bad character');
+ }
+ let totalBytesForChunk = countFullBytesInBase64StringIncludingExtraBits(str, extraBitCount); // also kinda validates padding, if present
+ let bytesToWrite;
+ let outputIndex;
+ if (into == null) {
+ into = new Uint8Array(totalBytesForChunk);
+ bytesToWrite = totalBytesForChunk;
+ } else {
+ bytesToWrite = Math.min(into.length - outputOffset, totalBytesForChunk);
+ // TODO error if bytesToWrite is ≤ 0, maybe?
+ }
+ let byte;
+ let written = 0;
+ let read = 0;
+ while (written < bytesToWrite) {
+ let char = str[read];
+ if (char === '=') {
+ throw new Error('unreachable');
+ }
+ 0, { extraBitCount, extraBits, byte } = decodeOneBase64Character(extraBitCount, extraBits, alphabetMap, char);
+ ++read;
+ if (byte != null) {
+ into[outputOffset + written] = byte;
+ ++written;
+ }
+ }
+ if (read < str.length && str[read] === '=') {
+ read = str.length;
+ // TODO if we want to be really pedantic, check extraBits === 0 here
+ if (extraBitCount === 0 || extraBitCount === 6) {
+ throw new Error('unreachable: malformed padding (checked earlier)');
+ }
+ }
+ if (read < str.length && extraBitCount === 0) {
+ // we can read one more character and store it in extra
+ let char = str[read];
+ 0, { extraBitCount, extraBits } = decodeOneBase64Character(extraBitCount, extraBits, alphabetMap, char);
+ ++read;
+ }
+ let extra = extraBitCount === 0 ? void 0 : { count: extraBitCount, bits: extraBits };
+ return { result: into, read, written, extra };
}
export function uint8ArrayToHex(arr) {
diff --git a/playground/polyfill-install.mjs b/playground/polyfill-install.mjs
index 703f632..291597e 100644
--- a/playground/polyfill-install.mjs
+++ b/playground/polyfill-install.mjs
@@ -1,4 +1,14 @@
-import { checkUint8Array, uint8ArrayToBase64, base64ToUint8Array, uint8ArrayToHex, hexToUint8Array } from './polyfill-core.mjs';
+import { alphabetFromIdentifier, checkUint8Array, uint8ArrayToBase64, base64ToUint8Array, uint8ArrayToHex, hexToUint8Array } from './polyfill-core.mjs';
+
+function getOptionsObject(opts) {
+ if (typeof opts === 'undefined') {
+ return { __proto__: null };
+ }
+ if (opts !== null && typeof opts === 'object') {
+ return opts;
+ }
+ throw new TypeError('bad options object');
+}
Uint8Array.prototype.toBase64 = function (opts) {
checkUint8Array(this);
@@ -22,10 +32,15 @@ Uint8Array.fromBase64 = function (string, opts) {
if (typeof string !== 'string') {
throw new Error('expected argument to be a string');
}
- let alphabet;
- if (opts && typeof opts === 'object') {
- 0, { alphabet } = opts;
+ if (string.endsWith('=')) {
+ // TODO should we validate padding even if absent? i.e. enforce length%4===0 unconditionally?
+ if (string.length % 4 !== 0 || string.endsWith('===')) {
+ throw new Error('bad padding');
+ }
}
+ opts = getOptionsObject(opts);
+ let { alphabet: alphabetIdentifier } = opts;
+ let alphabet = typeof alphabetIdentifier === 'undefined' ? alphabetFromIdentifier('base64') : alphabetFromIdentifier(alphabetIdentifier);
return base64ToUint8Array(string, alphabet).result;
};
@@ -33,11 +48,38 @@ Uint8Array.fromPartialBase64 = function (string, opts) {
if (typeof string !== 'string') {
throw new Error('expected argument to be a string');
}
- let alphabet, more, extra;
- if (opts && typeof opts === 'object') {
- 0, { alphabet, more, extra } = opts;
+ opts = getOptionsObject(opts);
+ let { alphabet: alphabetIdentifier, extra, into, inputOffset, outputOffset } = opts;
+ let alphabet = typeof alphabetIdentifier === 'undefined' ? alphabetFromIdentifier('base64') : alphabetFromIdentifier(alphabetIdentifier);
+ let extraBitCount = 0, extraBits = 0;
+ if (extra !== null && typeof extra === 'object') {
+ 0, { count: extraBitCount, bits: extraBits } = extra;
+ if (typeof extraBitCount !== 'number' || !(extraBitCount === 0 || extraBitCount === 2 || extraBitCount === 4 || extraBitCount === 6)) {
+ throw new TypeError('bit count must be 0, 2, 4, or 6');
+ }
+ if (typeof extraBits !== 'number' || extraBits < 0 || (extraBits | 0) !== extraBits || (extraBitCount === 0 && extraBits !== 0) || (extraBitCount === 2 && extraBits >= 4) || (extraBitCount === 4 && extraBits >= 16) || (extraBitCount === 6 && extraBits >= 64)) {
+ throw new TypeError('bits not well-formed');
+ }
+ } else if (typeof extra !== 'undefined') {
+ throw new TypeError('extra must be an object');
+ }
+ if (typeof into !== 'undefined') {
+ checkUint8Array(into);
+ }
+ if (typeof inputOffset !== 'undefined') {
+ if (typeof inputOffset !== 'number' || Math.floor(inputOffset) !== inputOffset || inputOffset < 0 || inputOffset >= string.length) {
+ throw new TypeError('bad inputOffset');
+ }
+ }
+ if (typeof outputOffset !== 'undefined') {
+ if (typeof into === 'undefined') {
+ throw new TypeError('outputOffset cannot be used with into');
+ }
+ if (typeof outputOffset !== 'number' || Math.floor(outputOffset) !== outputOffset || outputOffset < 0 || outputOffset >= outputOffset.length) {
+ throw new TypeError('bad outputOffset');
+ }
}
- return base64ToUint8Array(string, alphabet, more, extra);
+ return base64ToUint8Array(string, alphabet, into, extraBitCount, extraBits, inputOffset, outputOffset);
};
Uint8Array.prototype.toHex = function () {
diff --git a/spec.html b/spec.html
index 4cd2943..0e76839 100644
--- a/spec.html
+++ b/spec.html
@@ -116,6 +116,7 @@