diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0279380..8b26207 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: submodules: recursive - uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: 3.14 - name: Run lint run: | python -m pip install tox @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - name: Set git to use LF on Windows if: runner.os == 'Windows' diff --git a/HISTORY.rst b/HISTORY.rst index 18ed2a4..916a6a8 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,16 @@ Changelog ========= +1.2.0.0 (TBD) +-------------------- + +- Upgraded libbrotli to v1.2.0. +- Added ``output_buffer_limit`` parameter to ``Decompressor.decompress()`` and + ``Decompressor.process()`` methods to allow mitigation of unexpectedly large + output. This addresses potential security concerns where maliciously crafted + compressed data could result in excessive memory usage during decompression. + + 1.1.0.0 (2023-09-14) -------------------- diff --git a/libbrotli b/libbrotli index ed738e8..028fb5a 160000 --- a/libbrotli +++ b/libbrotli @@ -1 +1 @@ -Subproject commit ed738e842d2fbdf2d6459e39267a633c4a9b2f5d +Subproject commit 028fb5a23661f123017c060daa546b55cf4bde29 diff --git a/setup.py b/setup.py index c545fd1..56a286a 100644 --- a/setup.py +++ b/setup.py @@ -119,5 +119,7 @@ def finalize_options(self): "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] ) diff --git a/src/brotlicffi/__init__.py b/src/brotlicffi/__init__.py index abc82e5..4ad22f0 100644 --- a/src/brotlicffi/__init__.py +++ b/src/brotlicffi/__init__.py @@ -5,4 +5,4 @@ Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error ) -__version__ = "1.1.0.0" +__version__ = "1.2.0.0" diff --git a/src/brotlicffi/_api.py b/src/brotlicffi/_api.py index b80b370..b7e34be 100644 --- a/src/brotlicffi/_api.py +++ b/src/brotlicffi/_api.py @@ -348,6 +348,10 @@ class Decompressor(object): .. versionchanged:: 0.5.0 Added ``dictionary`` parameter. + .. versionchanged:: 1.2.0 + Added ``can_accept_more_data()`` method and optional + ``output_buffer_limit`` parameter to ``process()``/``decompress()``. + :param dictionary: A pre-set dictionary for LZ77. Please use this with caution: if a dictionary is used for compression, the same dictionary **must** be used for decompression! @@ -355,10 +359,12 @@ class Decompressor(object): """ _dictionary = None _dictionary_size = None + _unconsumed_data = None def __init__(self, dictionary=b''): dec = lib.BrotliDecoderCreateInstance(ffi.NULL, ffi.NULL, ffi.NULL) self._decoder = ffi.gc(dec, lib.BrotliDecoderDestroyInstance) + self._unconsumed_data = b'' if dictionary: self._dictionary = ffi.new("uint8_t []", dictionary) @@ -369,23 +375,73 @@ def __init__(self, dictionary=b''): self._dictionary ) - def decompress(self, data): + @staticmethod + def _calculate_buffer_size( + input_data_len, output_buffer_limit, chunks_len, chunks_num + ): + if output_buffer_limit is not None: + return output_buffer_limit - chunks_len + # When `decompress(b'')` is called without `output_buffer_limit`. + elif input_data_len == 0: + # libbrotli would use 32 KB as a starting buffer size and double it + # each time, capped at 16 MB. + # https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292 + return 1 << min(chunks_num + 15, 24) + else: + # Allocate a buffer that's hopefully overlarge, but if it's not we + # don't mind: we'll spin around again. + return 5 * input_data_len + + def decompress(self, data, output_buffer_limit=None): """ Decompress part of a complete Brotli-compressed string. + .. versionchanged:: 1.2.0 + Added ``output_buffer_limit`` parameter. + :param data: A bytestring containing Brotli-compressed data. + :param output_buffer_limit: Optional maximum size for the output + buffer. If set, the output buffer will not grow once its size + equals or exceeds this value. If the limit is reached, further + calls to process (potentially with empty input) will continue to + yield more data. Following process() calls must only be called + with empty input until can_accept_more_data() returns True. + :type output_buffer_limit: ``int`` or ``None`` :returns: A bytestring containing the decompressed data. """ + if self._unconsumed_data and data: + raise error( + "brotli: decoder process called with data when " + "'can_accept_more_data()' is False" + ) + + # We should avoid operations on the `self._unconsumed_data` if no data + # is to be processed. + if output_buffer_limit is not None and output_buffer_limit <= 0: + return b'' + + # Use unconsumed data if available, use new data otherwise. + if self._unconsumed_data: + input_data = self._unconsumed_data + self._unconsumed_data = b'' + else: + input_data = data + chunks = [] + chunks_len = 0 - available_in = ffi.new("size_t *", len(data)) - in_buffer = ffi.new("uint8_t[]", data) + available_in = ffi.new("size_t *", len(input_data)) + in_buffer = ffi.new("uint8_t[]", input_data) next_in = ffi.new("uint8_t **", in_buffer) while True: - # Allocate a buffer that's hopefully overlarge, but if it's not we - # don't mind: we'll spin around again. - buffer_size = 5 * len(data) + buffer_size = self._calculate_buffer_size( + input_data_len=len(input_data), + output_buffer_limit=output_buffer_limit, + chunks_len=chunks_len, + chunks_num=len(chunks), + ) + available_out = ffi.new("size_t *", buffer_size) out_buffer = ffi.new("uint8_t[]", buffer_size) next_out = ffi.new("uint8_t **", out_buffer) @@ -408,6 +464,19 @@ def decompress(self, data): # Next, copy the result out. chunk = ffi.buffer(out_buffer, buffer_size - available_out[0])[:] chunks.append(chunk) + chunks_len += len(chunk) + + # Save any unconsumed input for the next call. + if available_in[0] > 0: + remaining_input = ffi.buffer(next_in[0], available_in[0])[:] + self._unconsumed_data = remaining_input + + # Check if we've reached the output limit. + if ( + output_buffer_limit is not None + and chunks_len >= output_buffer_limit + ): + break if rc == lib.BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: assert available_in[0] == 0 @@ -459,3 +528,30 @@ def is_finished(self): is complete, ``False`` otherwise """ return lib.BrotliDecoderIsFinished(self._decoder) == lib.BROTLI_TRUE + + def can_accept_more_data(self): + """ + Checks if the decompressor can accept more compressed data. + + If the ``output_buffer_limit`` parameter was used with + ``decompress()`` or ``process()``, this method should be checked to + determine if the decompressor is ready to accept new input. When the + output buffer limit is reached, the decompressor may still have + unconsumed input data or internal buffered output, and calling + ``decompress(b'')`` repeatedly will continue producing output until + this method returns ``True``. + + .. versionadded:: 1.2.0 + + :returns: ``True`` if the decompressor is ready to accept more + compressed data via ``decompress()`` or ``process()``, ``False`` + if the decompressor needs to output some data via + ``decompress(b'')``/``process(b'')`` before being provided any + more compressed data. + :rtype: ``bool`` + """ + if len(self._unconsumed_data) > 0: + return False + if lib.BrotliDecoderHasMoreOutput(self._decoder) == lib.BROTLI_TRUE: + return False + return True diff --git a/test/test_simple_decompression.py b/test/test_simple_decompression.py index f7496ed..232a730 100644 --- a/test/test_simple_decompression.py +++ b/test/test_simple_decompression.py @@ -38,6 +38,57 @@ def test_decompressobj(simple_compressed_file): assert data == uncompressed_data +# `more_data_limit` allows testing `decompress(b'')` with and without a limit. +@pytest.mark.parametrize('more_data_limit', [100, None]) +def test_decompressobj_with_output_buffer_limit( + simple_compressed_file, more_data_limit +): + """ + Test decompression with `output_buffer_limit` set. + """ + with open(simple_compressed_file[0], 'rb') as f: + uncompressed_data = f.read() + + with open(simple_compressed_file[1], 'rb') as f: + compressed_data = f.read() + + o = brotlicffi.Decompressor() + assert o.can_accept_more_data() + small_limit = 100 + result = o.decompress(compressed_data, output_buffer_limit=small_limit) + assert len(result) <= small_limit + + # Ensure `output_buffer_limit` of zero works. + assert o.decompress(b'', output_buffer_limit=0) == b'' + + if o._unconsumed_data: + with pytest.raises( + brotlicffi.error, + match=( + r"brotli: decoder process called with data when " + r"'can_accept_more_data\(\)' is False" + ), + ): + o.decompress(b'additional data') + + if not o.is_finished(): + assert not o.can_accept_more_data() + + # Continue decompressing with empty input. + all_output = [result] + while not o.can_accept_more_data() and not o.is_finished(): + more_output = o.decompress( + b'', output_buffer_limit=more_data_limit + ) + if more_data_limit is not None: + assert len(more_output) <= more_data_limit + all_output.append(more_output) + assert o.can_accept_more_data() or o.is_finished() + + final_result = b''.join(all_output) + assert final_result == uncompressed_data + + def test_drip_feed(simple_compressed_file): """ Sending in the data one byte at a time still works. diff --git a/tox.ini b/tox.ini index 8e4db44..76e7ef5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py37, py38, py39, py310, py311, py312, pypy, lint +envlist = py37, py38, py39, py310, py311, py312, py313, py314, pypy, lint [testenv] deps= -r{toxinidir}/test_requirements.txt