write data up until error in setFromX methods (#58)

bakkot · web-flow · commit 66fa8c8b8fb9 · 2024-06-11T01:44:11.000-07:00
diff --git a/playground/polyfill-core.mjs b/playground/polyfill-core.mjs
@@ -125,7 +125,7 @@ function skipAsciiWhitespace(string, index) {
 
 function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
   if (maxLength === 0) {
-    return { read: 0, bytes: [] };
+    return { read: 0, bytes: [], error: null };
   }
 
   let read = 0;
@@ -138,62 +138,69 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
     if (index === string.length) {
       if (chunk.length > 0) {
         if (lastChunkHandling === 'stop-before-partial') {
-          return { bytes, read };
+          return { bytes, read, error: null };
         } else if (lastChunkHandling === 'loose') {
           if (chunk.length === 1) {
-            throw new SyntaxError('malformed padding: exactly one additional character');
+            let error = new SyntaxError('malformed padding: exactly one additional character');
+            return { bytes, read, error };
           }
           bytes.push(...decodeBase64Chunk(chunk, false));
         } else {
           assert(lastChunkHandling === 'strict');
-          throw new SyntaxError('missing padding');
+          let error = new SyntaxError('missing padding');
+          return { bytes, read, error };
         }
       }
-      return { bytes, read: string.length };
+      return { bytes, read: string.length, error: null };
     }
     let char = string[index];
     ++index;
     if (char === '=') {
       if (chunk.length < 2) {
-        throw new SyntaxError('padding is too early');
+        let error = new SyntaxError('padding is too early');
+        return { bytes, read, error };
       }
       index = skipAsciiWhitespace(string, index);
       if (chunk.length === 2) {
         if (index === string.length) {
           if (lastChunkHandling === 'stop-before-partial') {
             // two characters then `=` then EOS: this is, technically, a partial chunk
-            return { bytes, read };
+            return { bytes, read, error: null };
           }
-          throw new SyntaxError('malformed padding - only one =');
+          let error = new SyntaxError('malformed padding - only one =');
+          return { bytes, read, error };
         }
         if (string[index] === '=') {
           ++index;
           index = skipAsciiWhitespace(string, index);
         }
       }
       if (index < string.length) {
-        throw new SyntaxError('unexpected character after padding');
+        let error = new SyntaxError('unexpected character after padding');
+        return { bytes, read, error };
       }
       bytes.push(...decodeBase64Chunk(chunk, lastChunkHandling === 'strict'));
       assert(bytes.length <= maxLength);
-      return { bytes, read: string.length };
+      return { bytes, read: string.length, error: null };
     }
     if (alphabet === 'base64url') {
       if (char === '+' || char === '/') {
-        throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
+        let error = new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
+        return { bytes, read, error };
       } else if (char === '-') {
         char = '+';
       } else if (char === '_') {
         char = '/';
       }
     }
     if (!base64Characters.includes(char)) {
-      throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
+      let error = new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
+      return { bytes, read, error };
     }
     let remainingBytes = maxLength - bytes.length;
     if (remainingBytes === 1 && chunk.length === 2 || remainingBytes === 2 && chunk.length === 3) {
       // special case: we can fit exactly the number of bytes currently represented by chunk, so we were just checking for `=`
-      return { bytes, read };
+      return { bytes, read, error: null };
     }
 
     chunk += char;
@@ -203,7 +210,7 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
       read = index;
       assert(bytes.length <= maxLength);
       if (bytes.length === maxLength) {
-        return { bytes, read };
+        return { bytes, read, error: null };
       }
     }
   }
@@ -231,14 +238,21 @@ export function base64ToUint8Array(string, options, into) {
 
   let maxLength = into ? into.length : 2 ** 53 - 1;
 
-  let { bytes, read } = fromBase64(string, alphabet, lastChunkHandling, maxLength);
+  let { bytes, read, error } = fromBase64(string, alphabet, lastChunkHandling, maxLength);
+  if (error && !into) {
+    throw error;
+  }
 
   bytes = new Uint8Array(bytes);
   if (into && bytes.length > 0) {
     assert(bytes.length <= into.length);
     into.set(bytes);
   }
 
+  if (error) {
+    throw error;
+  }
+
   return { read, bytes };
 }
 
@@ -254,6 +268,26 @@ export function uint8ArrayToHex(arr) {
   return out;
 }
 
+function fromHex(string, maxLength) {
+  let bytes = [];
+  let read = 0;
+  if (maxLength > 0) {
+    while (read < string.length) {
+      let hexits = string.slice(read, read + 2);
+      if (/[^0-9a-fA-F]/.test(hexits)) {
+        let error = new SyntaxError('string should only contain hex characters');
+        return { read, bytes, error }
+      }
+      bytes.push(parseInt(hexits, 16));
+      read += 2;
+      if (bytes.length === maxLength) {
+        break;
+      }
+    }
+  }
+  return { read, bytes, error: null }
+}
+
 export function hexToUint8Array(string, into) {
   if (typeof string !== 'string') {
     throw new TypeError('expected string to be a string');
@@ -266,23 +300,9 @@ export function hexToUint8Array(string, into) {
   }
 
   let maxLength = into ? into.length : 2 ** 53 - 1;
-
-  // TODO should hex allow whitespace?
-  // TODO should hex support lastChunkHandling? (only 'strict' or 'stop-before-partial')
-  let bytes = [];
-  let index = 0;
-  if (maxLength > 0) {
-    while (index < string.length) {
-      let hexits = string.slice(index, index + 2);
-      if (/[^0-9a-fA-F]/.test(hexits)) {
-        throw new SyntaxError('string should only contain hex characters');
-      }
-      bytes.push(parseInt(hexits, 16));
-      index += 2;
-      if (bytes.length === maxLength) {
-        break;
-      }
-    }
+  let { read, bytes, error } = fromHex(string, maxLength);
+  if (error && !into) {
+    throw error;
   }
 
   bytes = new Uint8Array(bytes);
@@ -291,5 +311,9 @@ export function hexToUint8Array(string, into) {
     into.set(bytes);
   }
 
-  return { read: index, bytes };
+  if (error) {
+    throw error;
+  }
+
+  return { read, bytes };
 }
diff --git a/spec.html b/spec.html
@@ -60,7 +60,9 @@ <h1>Uint8Array.fromBase64 ( _string_ [ , _options_ ] )</h1>
     1. Let _lastChunkHandling_ be ? Get(_opts_, *"lastChunkHandling"*).
     1. If _lastChunkHandling_ is *undefined*, set _lastChunkHandling_ to *"loose"*.
     1. If _lastChunkHandling_ is not one of *"loose"*, *"strict"*, or *"stop-before-partial"*, throw a *TypeError* exception.
-    1. Let _result_ be ? FromBase64(_string_, _alphabet_, _lastChunkHandling_).
+    1. Let _result_ be FromBase64(_string_, _alphabet_, _lastChunkHandling_).
+    1. If _result_.[[Error]] is not ~none~, then
+      1. Throw _result_.[[Error]].
     1. Let _resultLength_ be the length of _result_.[[Bytes]].
     1. Let _ta_ be ? <emu-meta suppress-effects="user-code">AllocateTypedArray(*"Uint8Array"*, %Uint8Array%, *"%Uint8Array.prototype%"*, _resultLength_)</emu-meta>.
     1. Set the value at each index of _ta_.[[ViewedArrayBuffer]].[[ArrayBufferData]] to the value at the corresponding index of _result_.[[Bytes]].
@@ -84,12 +86,14 @@ <h1>Uint8Array.prototype.setFromBase64 ( _string_ [ , _options_ ] )</h1>
     1. Let _taRecord_ be MakeTypedArrayWithBufferWitnessRecord(_into_, ~seq-cst~).
     1. If IsTypedArrayOutOfBounds(_taRecord_) is *true*, throw a *TypeError* exception.
     1. Let _byteLength_ be TypedArrayLength(_taRecord_).
-    1. Let _result_ be ? FromBase64(_string_, _alphabet_, _lastChunkHandling_, _byteLength_).
+    1. Let _result_ be FromBase64(_string_, _alphabet_, _lastChunkHandling_, _byteLength_).
     1. Let _bytes_ be _result_.[[Bytes]].
     1. Let _written_ be the length of _bytes_.
     1. NOTE: FromBase64 does not invoke any user code, so the ArrayBuffer backing _into_ cannot have been detached or shrunk.
     1. Assert: _written_ ≤ _byteLength_.
     1. Perform SetUint8ArrayBytes(_into_, _bytes_).
+    1. If _result_.[[Error]] is not ~none~, then
+      1. Throw _result_.[[Error]].
     1. Let _resultObject_ be OrdinaryObjectCreate(%Object.prototype%).
     1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"read"*, 𝔽(_result_.[[Read]])).
     1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"written"*, 𝔽(_written_)).
@@ -101,7 +105,9 @@ <h1>Uint8Array.prototype.setFromBase64 ( _string_ [ , _options_ ] )</h1>
   <h1>Uint8Array.fromHex ( _string_ )</h1>
   <emu-alg>
     1. If _string_ is not a String, throw a *TypeError* exception.
-    1. Let _result_ be ? FromHex(_string_).
+    1. Let _result_ be FromHex(_string_).
+    1. If _result_.[[Error]] is not ~none~, then
+      1. Throw _result_.[[Error]].
     1. Let _resultLength_ be the length of _result_.[[Bytes]].
     1. Let _ta_ be ? <emu-meta suppress-effects="user-code">AllocateTypedArray(*"Uint8Array"*, %Uint8Array%, *"%Uint8Array.prototype%"*, _resultLength_)</emu-meta>.
     1. Set the value at each index of _ta_.[[ViewedArrayBuffer]].[[ArrayBufferData]] to the value at the corresponding index of _result_.[[Bytes]].
@@ -118,12 +124,14 @@ <h1>Uint8Array.prototype.setFromHex ( _string_ )</h1>
     1. Let _taRecord_ be MakeTypedArrayWithBufferWitnessRecord(_into_, ~seq-cst~).
     1. If IsTypedArrayOutOfBounds(_taRecord_) is *true*, throw a *TypeError* exception.
     1. Let _byteLength_ be TypedArrayLength(_taRecord_).
-    1. Let _result_ be ? FromHex(_string_, _byteLength_).
+    1. Let _result_ be FromHex(_string_, _byteLength_).
     1. Let _bytes_ be _result_.[[Bytes]].
     1. Let _written_ be the length of _bytes_.
     1. NOTE: FromHex does not invoke any user code, so the ArrayBuffer backing _into_ cannot have been detached or shrunk.
     1. Assert: _written_ ≤ _byteLength_.
     1. Perform SetUint8ArrayBytes(_into_, _bytes_).
+    1. If _result_.[[Error]] is not ~none~, then
+      1. Throw _result_.[[Error]].
     1. Let _resultObject_ be OrdinaryObjectCreate(%Object.prototype%).
     1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"read"*, 𝔽(_result_.[[Read]])).
     1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"written"*, 𝔽(_written_)).
@@ -254,7 +262,7 @@ <h1>
         _alphabet_: *"base64"* or *"base64url"*,
         _lastChunkHandling_: *"loose"*, *"strict"*, or *"stop-before-partial"*,
         optional _maxLength_: a non-negative integer,
-      ): either a normal completion containing a Record with fields [[Read]] (an integral Number) and [[Bytes]] (a List of byte values), or a throw completion
+      ): a Record with fields [[Read]] (an integral Number), [[Bytes]] (a List of byte values), and [[Error]] (either ~none~ or a throw completion)
     </h1>
     <dl class="header">
     </dl>
@@ -264,7 +272,7 @@ <h1>
         1. NOTE: Because the input is a string, the length of strings is limited to 2<sup>53</sup> - 1 characters, and the output requires no more bytes than the input has characters, this limit can never be reached. However, it is editorially convenient to use a finite value here.
       1. NOTE: The order of validation and decoding in the algorithm below is not observable. Implementations are encouraged to perform them in whatever order is most efficient, possibly interleaving validation with decoding, as long as the behaviour is observably equivalent.
       1. If _maxLength_ is 0, then
-        1. Return the Record { [[Read]]: 0, [[Bytes]]: « » }.
+        1. Return the Record { [[Read]]: 0, [[Bytes]]: « », [[Error]]: ~none~ }.
       1. Let _read_ be 0.
       1. Let _bytes_ be « ».
       1. Let _chunk_ be the empty String.
@@ -276,43 +284,58 @@ <h1>
         1. If _index_ = _length_, then
           1. If _chunkLength_ > 0, then
             1. If _lastChunkHandling_ is *"stop-before-partial"*, then
-              1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
+              1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
             1. Else if _lastChunkHandling_ is *"loose"*, then
               1. If _chunkLength_ is 1, then
-                1. Throw a *SyntaxError* exception.
+                1. Let _error_ be a new *SyntaxError* exception.
+                1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
               1. Set _bytes_ to the list-concatenation of _bytes_ and ! DecodeBase64Chunk(_chunk_, *false*).
             1. Else,
               1. Assert: _lastChunkHandling_ is *"strict"*.
-              1. Throw a *SyntaxError* exception.
-          1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_ }.
+              1. Let _error_ be a new *SyntaxError* exception.
+              1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
+          1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
         1. Let _char_ be the substring of _string_ from _index_ to _index_ + 1.
         1. Set _index_ to _index_ + 1.
         1. If _char_ is *"="*, then
           1. If _chunkLength_ < 2, then
-            1. Throw a *SyntaxError* exception.
+            1. Let _error_ be a new *SyntaxError* exception.
+            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
           1. Set _index_ to SkipAsciiWhitespace(_string_, _index_).
           1. If _chunkLength_ = 2, then
             1. If _index_ = _length_, then
               1. If _lastChunkHandling_ is *"stop-before-partial"*, then
-                1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
-              1. Throw a *SyntaxError* exception.
+                1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
+              1. Let _error_ be a new *SyntaxError* exception.
+              1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
             1. Set _char_ to the substring of _string_ from _index_ to _index_ + 1.
             1. If _char_ is *"="*, then
               1. Set _index_ to SkipAsciiWhitespace(_string_, _index_ + 1).
           1. If _index_ < _length_, then
-            1. Throw a *SyntaxError* exception.
+            1. Let _error_ be a new *SyntaxError* exception.
+            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
           1. If _lastChunkHandling_ is *"strict"*, let _throwOnExtraBits_ be *true*.
           1. Else, let _throwOnExtraBits_ be *false*.
-          1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _throwOnExtraBits_).
-          1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_ }.
+          1. Let _decodeResult_ be Completion(DecodeBase64Chunk(_chunk_, _throwOnExtraBits_)).
+          1. If _decodeResult_ is an abrupt completion, then
+            1. Let _error_ be _decodeResult_.[[Value]].
+            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
+          1. Set _bytes_ to the list-concatenation of _bytes_ and ! _decodeResult_.
+          1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
         1. If _alphabet_ is *"base64url"*, then
-          1. If _char_ is either *"+"* or *"/"*, throw a *SyntaxError* exception.
-          1. Else if _char_ is *"-"*, set _char_ to *"+"*.
-          1. Else if _char_ is *"_"*, set _char_ to *"/"*.
-        1. If the sole code unit of _char_ is not an element of the standard base64 alphabet, throw a *SyntaxError* exception.
+          1. If _char_ is either *"+"* or *"/"*, then
+            1. Let _error_ be a new *SyntaxError* exception.
+            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
+          1. Else if _char_ is *"-"*, then
+            1. Set _char_ to *"+"*.
+          1. Else if _char_ is *"_"*, then
+            1. Set _char_ to *"/"*.
+        1. If the sole code unit of _char_ is not an element of the standard base64 alphabet, then
+          1. Let _error_ be a new *SyntaxError* exception.
+          1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
         1. Let _remaining_ be _maxLength_ - the length of _bytes_.
         1. If _remaining_ = 1 and _chunkLength_ = 2, or if _remaining_ = 2 and _chunkLength_ = 3, then
-          1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
+          1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
         1. Set _chunk_ to the string-concatenation of _chunk_ and _char_.
         1. Set _chunkLength_ to the length of _chunk_.
         1. If _chunkLength_ = 4, then
@@ -321,7 +344,7 @@ <h1>
           1. Set _chunkLength_ to 0.
           1. Set _read_ to _index_.
           1. If the length of _bytes_ = _maxLength_, then
-            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
+            1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
     </emu-alg>
   </emu-clause>
 
@@ -330,23 +353,27 @@ <h1>
       FromHex (
         _string_: a string,
         optional _maxLength_: a non-negative integer,
-      ): either a normal completion containing a Record with fields [[Read]] (an integral Number) and [[Bytes]] (a List of byte values), or a throw completion
+      ): a Record with fields [[Read]] (an integral Number), [[Bytes]] (a List of byte values), and [[Error]] (either ~none~ or a throw completion)
     </h1>
     <dl class="header">
     </dl>
     <emu-alg>
       1. If _maxLength_ is not present, let _maxLength_ be 2<sup>53</sup> - 1.
       1. Let _length_ be the length of _string_.
-      1. If _length_ modulo 2 is not 0, throw a *SyntaxError* exception.
       1. Let _bytes_ be « ».
-      1. Let _index_ be 0.
-      1. Repeat, while _index_ &lt; _length_ and the length of _bytes_ &lt; _maxLength_,
-        1. Let _hexits_ be the substring of _string_ from _index_ to _index_ + 2.
-        1. If _hexits_ contains any code units which are not in *"0123456789abcdefABCDEF"*, throw a *SyntaxError* exception.
-        1. Set _index_ to _index_ + 2.
+      1. Let _read_ be 0.
+      1. If _length_ modulo 2 is not 0, then
+        1. Let _error_ be a new *SyntaxError* exception.
+        1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
+      1. Repeat, while _read_ &lt; _length_ and the length of _bytes_ &lt; _maxLength_,
+        1. Let _hexits_ be the substring of _string_ from _read_ to _read_ + 2.
+        1. If _hexits_ contains any code units which are not in *"0123456789abcdefABCDEF"*, then
+          1. Let _error_ be a new *SyntaxError* exception.
+          1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
+        1. Set _read_ to _read_ + 2.
         1. Let _byte_ be the integer value represented by _hexits_ in base-16 notation, using the letters A-F and a-f for digits with values 10 through 15.
         1. Append _byte_ to _bytes_.
-      1. Return the Record { [[Read]]: _index_, [[Bytes]]: _bytes_ }.
+      1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
     </emu-alg>
   </emu-clause>
 
diff --git a/test-polyfill.mjs b/test-polyfill.mjs