Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
submodules: recursive
- uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: 3.14
- name: Run lint
run: |
python -m pip install tox
Expand All @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- name: Set git to use LF on Windows
if: runner.os == 'Windows'
Expand Down
10 changes: 10 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Changelog
=========

1.2.0.0 (TBD)
--------------------

- Upgraded libbrotli to v1.2.0.
- Added ``output_buffer_limit`` parameter to ``Decompressor.decompress()`` and
``Decompressor.process()`` methods to allow mitigation of unexpectedly large
output. This addresses potential security concerns where maliciously crafted
compressed data could result in excessive memory usage during decompression.


1.1.0.0 (2023-09-14)
--------------------

Expand Down
2 changes: 1 addition & 1 deletion libbrotli
Submodule libbrotli updated 232 files
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,7 @@ def finalize_options(self):
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
)
2 changes: 1 addition & 1 deletion src/brotlicffi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error
)

__version__ = "1.1.0.0"
__version__ = "1.2.0.0"
108 changes: 102 additions & 6 deletions src/brotlicffi/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,17 +348,23 @@ class Decompressor(object):
.. versionchanged:: 0.5.0
Added ``dictionary`` parameter.

.. versionchanged:: 1.2.0
Added ``can_accept_more_data()`` method and optional
``output_buffer_limit`` parameter to ``process()``/``decompress()``.

:param dictionary: A pre-set dictionary for LZ77. Please use this with
caution: if a dictionary is used for compression, the same dictionary
**must** be used for decompression!
:type dictionary: ``bytes``
"""
_dictionary = None
_dictionary_size = None
_unconsumed_data = None

def __init__(self, dictionary=b''):
dec = lib.BrotliDecoderCreateInstance(ffi.NULL, ffi.NULL, ffi.NULL)
self._decoder = ffi.gc(dec, lib.BrotliDecoderDestroyInstance)
self._unconsumed_data = b''

if dictionary:
self._dictionary = ffi.new("uint8_t []", dictionary)
Expand All @@ -369,23 +375,73 @@ def __init__(self, dictionary=b''):
self._dictionary
)

def decompress(self, data):
@staticmethod
def _calculate_buffer_size(
input_data_len, output_buffer_limit, chunks_len, chunks_num
):
if output_buffer_limit is not None:
return output_buffer_limit - chunks_len
# When `decompress(b'')` is called without `output_buffer_limit`.
elif input_data_len == 0:
# libbrotli would use 32 KB as a starting buffer size and double it
# each time, capped at 16 MB.
# https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
return 1 << min(chunks_num + 15, 24)
else:
# Allocate a buffer that's hopefully overlarge, but if it's not we
# don't mind: we'll spin around again.
return 5 * input_data_len

def decompress(self, data, output_buffer_limit=None):
"""
Decompress part of a complete Brotli-compressed string.

.. versionchanged:: 1.2.0
Added ``output_buffer_limit`` parameter.

:param data: A bytestring containing Brotli-compressed data.
:param output_buffer_limit: Optional maximum size for the output
buffer. If set, the output buffer will not grow once its size
equals or exceeds this value. If the limit is reached, further
calls to process (potentially with empty input) will continue to
yield more data. Following process() calls must only be called
with empty input until can_accept_more_data() returns True.
:type output_buffer_limit: ``int`` or ``None``
:returns: A bytestring containing the decompressed data.
"""
if self._unconsumed_data and data:
raise error(
"brotli: decoder process called with data when "
"'can_accept_more_data()' is False"
)

# We should avoid operations on the `self._unconsumed_data` if no data
# is to be processed.
if output_buffer_limit is not None and output_buffer_limit <= 0:
return b''

# Use unconsumed data if available, use new data otherwise.
if self._unconsumed_data:
input_data = self._unconsumed_data
self._unconsumed_data = b''
else:
input_data = data

chunks = []
chunks_len = 0

available_in = ffi.new("size_t *", len(data))
in_buffer = ffi.new("uint8_t[]", data)
available_in = ffi.new("size_t *", len(input_data))
in_buffer = ffi.new("uint8_t[]", input_data)
next_in = ffi.new("uint8_t **", in_buffer)

while True:
# Allocate a buffer that's hopefully overlarge, but if it's not we
# don't mind: we'll spin around again.
buffer_size = 5 * len(data)
buffer_size = self._calculate_buffer_size(
input_data_len=len(input_data),
output_buffer_limit=output_buffer_limit,
chunks_len=chunks_len,
chunks_num=len(chunks),
)

available_out = ffi.new("size_t *", buffer_size)
out_buffer = ffi.new("uint8_t[]", buffer_size)
next_out = ffi.new("uint8_t **", out_buffer)
Expand All @@ -408,6 +464,19 @@ def decompress(self, data):
# Next, copy the result out.
chunk = ffi.buffer(out_buffer, buffer_size - available_out[0])[:]
chunks.append(chunk)
chunks_len += len(chunk)

# Save any unconsumed input for the next call.
if available_in[0] > 0:
remaining_input = ffi.buffer(next_in[0], available_in[0])[:]
self._unconsumed_data = remaining_input

# Check if we've reached the output limit.
if (
output_buffer_limit is not None
and chunks_len >= output_buffer_limit
):
break

if rc == lib.BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
assert available_in[0] == 0
Expand Down Expand Up @@ -459,3 +528,30 @@ def is_finished(self):
is complete, ``False`` otherwise
"""
return lib.BrotliDecoderIsFinished(self._decoder) == lib.BROTLI_TRUE

def can_accept_more_data(self):
"""
Checks if the decompressor can accept more compressed data.

If the ``output_buffer_limit`` parameter was used with
``decompress()`` or ``process()``, this method should be checked to
determine if the decompressor is ready to accept new input. When the
output buffer limit is reached, the decompressor may still have
unconsumed input data or internal buffered output, and calling
``decompress(b'')`` repeatedly will continue producing output until
this method returns ``True``.

.. versionadded:: 1.2.0

:returns: ``True`` if the decompressor is ready to accept more
compressed data via ``decompress()`` or ``process()``, ``False``
if the decompressor needs to output some data via
``decompress(b'')``/``process(b'')`` before being provided any
more compressed data.
:rtype: ``bool``
"""
if len(self._unconsumed_data) > 0:
return False
if lib.BrotliDecoderHasMoreOutput(self._decoder) == lib.BROTLI_TRUE:
return False
return True
51 changes: 51 additions & 0 deletions test/test_simple_decompression.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,57 @@ def test_decompressobj(simple_compressed_file):
assert data == uncompressed_data


# `more_data_limit` allows testing `decompress(b'')` with and without a limit.
@pytest.mark.parametrize('more_data_limit', [100, None])
def test_decompressobj_with_output_buffer_limit(
simple_compressed_file, more_data_limit
):
"""
Test decompression with `output_buffer_limit` set.
"""
with open(simple_compressed_file[0], 'rb') as f:
uncompressed_data = f.read()

with open(simple_compressed_file[1], 'rb') as f:
compressed_data = f.read()

o = brotlicffi.Decompressor()
assert o.can_accept_more_data()
small_limit = 100
result = o.decompress(compressed_data, output_buffer_limit=small_limit)
assert len(result) <= small_limit

# Ensure `output_buffer_limit` of zero works.
assert o.decompress(b'', output_buffer_limit=0) == b''

if o._unconsumed_data:
with pytest.raises(
brotlicffi.error,
match=(
r"brotli: decoder process called with data when "
r"'can_accept_more_data\(\)' is False"
),
):
o.decompress(b'additional data')

if not o.is_finished():
assert not o.can_accept_more_data()

# Continue decompressing with empty input.
all_output = [result]
while not o.can_accept_more_data() and not o.is_finished():
more_output = o.decompress(
b'', output_buffer_limit=more_data_limit
)
if more_data_limit is not None:
assert len(more_output) <= more_data_limit
all_output.append(more_output)
assert o.can_accept_more_data() or o.is_finished()

final_result = b''.join(all_output)
assert final_result == uncompressed_data


def test_drip_feed(simple_compressed_file):
"""
Sending in the data one byte at a time still works.
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py37, py38, py39, py310, py311, py312, pypy, lint
envlist = py37, py38, py39, py310, py311, py312, py313, py314, pypy, lint

[testenv]
deps= -r{toxinidir}/test_requirements.txt
Expand Down
Loading