From 01c98698b35e454308e7619e0aa44523a03b699d Mon Sep 17 00:00:00 2001 From: Mikko Koivunalho Date: Sat, 25 Oct 2025 18:20:23 +0200 Subject: [PATCH 1/5] Add support for Zstandard compression * Use IO::Compress::Zstd instead of Compress::Zstd because the latter has no GitHub activity from the part of the developer since 2019. * Issue: https://github.com/libwww-perl/HTTP-Message/issues/205 --- lib/HTTP/Message.pm | 37 +++++++++++- t/files/lorem_ipsum.txt | 9 +++ t/files/lorem_ipsum.txt.zst | Bin 0 -> 1212 bytes t/files/lorem_ipsum.txt.zst.b64 | 22 +++++++ t/message-zstd.t | 98 ++++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 t/files/lorem_ipsum.txt create mode 100644 t/files/lorem_ipsum.txt.zst create mode 100644 t/files/lorem_ipsum.txt.zst.b64 create mode 100644 t/message-zstd.t diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm index ede3eefb..d0817bdb 100644 --- a/lib/HTTP/Message.pm +++ b/lib/HTTP/Message.pm @@ -7,6 +7,8 @@ our $VERSION = '7.02'; require HTTP::Headers; require Carp; +use Module::Load (); +use Module::Load::Conditional (); our $MAXIMUM_BODY_SIZE; @@ -354,6 +356,25 @@ sub decoded_content $content_ref = \$output; $content_ref_iscopy++; } + elsif ($ce eq 'zstd') { + Module::Load::load('IO::Uncompress::UnZstd'); + my $buffer; + my $z; + if( defined $content_limit ) { + $z = IO::Uncompress::UnZstd->new( $content_ref, InputLength => $content_limit, Append => 1, Strict => 1, ) + or Carp::croak "IO::Compress::Zstd->new failed: $IO::Uncompress::UnZstd::UnZstdError\n"; + } else { + $z = IO::Uncompress::UnZstd->new( $content_ref, Append => 1, Strict => 1, ) + or Carp::croak "IO::Compress::Zstd->new failed: $IO::Uncompress::UnZstd::UnZstdError\n"; + } + my $status; + while( $status = $z->read($buffer) > 0 ) { } + if( $status < 0 ) { + Carp::croak "IO::Compress::Zstd::read failed: $IO::Uncompress::UnZstd::UnZstdError\n"; + } + $content_ref = \$buffer; + $content_ref_iscopy++; + } elsif ($ce eq "x-bzip2" or $ce eq "bzip2") { require Compress::Raw::Bzip2; @@ -509,6 +530,9 @@ sub decodable require IO::Uncompress::Brotli; push(@enc, 'br'); }; + if( Module::Load::Conditional::check_install( module => 'IO::Compress::Zstd') ) { + push(@enc, "zstd"); + } # we don't care about announcing the 'identity', 'base64' and # 'quoted-printable' stuff return wantarray ? @enc : join(", ", @enc); @@ -577,6 +601,16 @@ sub encode elsif ($encoding eq "rot13") { # for the fun of it $content =~ tr/A-Za-z/N-ZA-Mn-za-m/; } + elsif ($encoding eq 'zstd') { + Module::Load::load('IO::Compress::Zstd'); + my $output; + my $z = IO::Compress::Zstd->new( \$output, Level => 3, Append => 0, Strict => 1, ) + or Carp::croak "IO::Compress::Zstd failed: $IO::Compress::Zstd::ZstdError\n"; + $z->write($content) + or Carp::croak "IO::Compress::Zstd::write failed: $IO::Compress::Zstd::ZstdError\n"; + $z->flush(); + $content = $output; + } else { return 0; } @@ -1062,7 +1096,8 @@ want to process its content as a string. Apply the given encodings to the content of the message. Returns TRUE if successful. The "identity" (non-)encoding is always supported; other currently supported encodings, subject to availability of required -additional modules, are "gzip", "deflate", "x-bzip2", "base64" and "br". +additional modules, are "gzip", "deflate", "x-bzip2", "br", "zstd" +and "base64". A successful call to this function will set the C header. diff --git a/t/files/lorem_ipsum.txt b/t/files/lorem_ipsum.txt new file mode 100644 index 00000000..123c7903 --- /dev/null +++ b/t/files/lorem_ipsum.txt @@ -0,0 +1,9 @@ +The standard Lorem Ipsum passage, used since the 1500s + +"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." +Section 1.10.32 of "de Finibus Bonorum et Malorum", written by Cicero in 45 BC + +"Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?" +1914 translation by H. Rackham + +"But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure?" diff --git a/t/files/lorem_ipsum.txt.zst b/t/files/lorem_ipsum.txt.zst new file mode 100644 index 0000000000000000000000000000000000000000..acd3cddc6b4e7b4ca18a8f1fb47478199fccad93 GIT binary patch literal 1212 zcmV;t1Vj5MwJ-euSezvQ%3Q<^BQQ^sHfXlH`+G|QR_V8!h!0$0%SM}FKw2{v{u%fN z4yv-i%>dE>)&RG(IhV0ACeL_#ZR5qvLxL-`xlU_&A=G%TYc%6+sS!GJ+qLr{d)&Bq zX_SxSolotcDW;3*@ow6BH}3jgBe;UA9eFaPG%x#ueL3#m9)2%PDunu&$HiyV9_*); zH}QMzc#`VS9#4mte(LGa&J?bwb{wh~HY(lNZkmfi=RRYfJ7@5&xg_JCOxtJA^JDvj z57`@dW^cJu*Jfj5i)o&)r8?hU6Xm%C0W_{ zmj_Fd@kW-BXY4elZft0=$2E@7LCyd*7vF*$ua-CP_DYu@tef_hp#?k9&Kdc#b4PP1 z-b>$8_z2Y@sk}4tLS1fpBakN;z(K(p0JLWcv1{#$#f_YfsgV zRL$kq4tsC~W1m^WUpp*_1OpbMUQ;uDQkQDrMG~=|_${R#$qHzjsTi#Ey21xJqb|k| z9z$uIK7#cvg}dPld{3#2Tca53amt?9o>4Qm5oP)InnSUwJ?z_OWMXJZ^Q9vABh#3+j(mA57=X+X(9c?J>c*G(^zLT*;@^ zLpAG!_YLXV`L^7}L!)_L$h4n^A1kS#4uR#hDmToYIi6R!dWy; zIh>@)Vf{1rXpH?di^Rvd&!4uZ7DErB`X+4k;W&fvkis!R$miuvcu8W|Jt**j;d%Gy z|1gL`YV%Ac7e>0SrYCeLo|m@$Y^OK0fYM#XTcF*1gz*5A4#tOdI;&-M=%%rf$QEk3 z-vc9l7oIc&_^oddNd?yFuP%Q0J`z=2rkv@-$B_aq$r9Z=QcFglS9w literal 0 HcmV?d00001 diff --git a/t/files/lorem_ipsum.txt.zst.b64 b/t/files/lorem_ipsum.txt.zst.b64 new file mode 100644 index 00000000..30714ec4 --- /dev/null +++ b/t/files/lorem_ipsum.txt.zst.b64 @@ -0,0 +1,22 @@ +KLUv/QBYnCUAylzEDSMwT5M2aLa7+3tLAlbpt5uID1xhy0abYUBaMxb+GfgGDqqywM0A0gDWALe0 +OZexMibPeHtt48XMQ4IrtLlOa3kh1HiuazTjbakROnPbtfMhe9y4eGmUj+OdT+2gKaaL6fFu2no3 +7vpeI7iCqx15MqU0L/uDfTnu3x5+L00qhPqYx8Vn1B7sp5U38XvteJLq0B5Ph5d+6unQzhSuqHYc +qhc2Kt3YbpqLQuc+Y587Z/CuuSTjn0zbZ8/zY/uED9kbeGZvuVPXZmNji2mesKU6314T5bkEArEB +AhRYuCl8M3j7Y9tPrFG7tTz1LgyeTLPBj9bB4r0pEwL7ZGy6ad/TZux2cNf5Tp67UG3Z2rp2PKXa +5TLZgytyUebQ8mDF5s6t+B634rLFk43xKUbrt9WOs33VHjdNRnvgPRG6L+pW7Xjtcrd2lKkt9WT6 +VJz85HShWzvSMda2MfWkat/mwhvvfQ9T3g7297jO3LJqy7l/TEICgTjQ8AIEjvdUM3fgkcXRmE8h +TxKpNVE5ONtTa2I2fHtxeApuJ3FGXsAARYAFxYIxCYiEIvXazz0d1vkQynTGVls7rzPHYOSvHa8F +tcHeJ4x2LnU5Jp6L6wTGKpjeMJ8Prtg1vGUp5ViplYOuFwDUeCslCreymmaZNraBp5gyPqpmUwc7 +lwdLkvFGlpFn7DSmbmxoscc1jtBBzgA1F9+CHK+WN/D2SpcfrJv2lqEFO9HOGfmyc0dzKN5L31P4 +CNUhqbwz8uTj1XY6g3M3JBCIVIqDqQhXLHhkcbRYyiewYGABkWDYbEdqS7StjtdQpqNTDu14v3aY +9j21HdnjCcIZtO8kqTbvljPb+z3NRdM8WarSVKnuZPr2pcZjYsJrT9WOVM3l1g57cAVjn1nDXzss +iAQDFqNeUzN9Updq4EUSsZ74LaUeyQpom6kYrPS6wgc5oy7GDx5DaZw+gvUthbvhDHxPqYxboxjr +ccqe2J5RM7cRZfn2mkOxqj3s22dkYmhJ9WBHdY9Tft0cip8Trki5T6fctE0ZdMlXzCmQNiaLN7XR +M1vGJF4x8njFi3zy1/TWjubXHokwI6bw945XjbrmgytQMyLIkik/2E3rXAp/zaCpoT2CUFmv9vJk +SKLDOfF5ngrktqkw9+LJne+2XcYWTxjIlm/P5aOu+WTN4dDeebIcf6ihIIQMMgohMxPIUECStA0g +QmKQUsgHEZQtg4hE05JCaRkDMLtE1l/ABKhDCgtvowneVt6ZeZ+XotmxuIWkC+qPVR9M583bCOsB +7THBuTRE0MtcyafVQzXrhPcN6dr5ttzFQ6N5X8i0n4YfK7h7yRO3oFMAuyAybha6xDBY8YZJcEdq +dygRwlk0TTmcpMlh/TP3aIz9NIvEx7nPn7anFkMPovombPXhOIPwkMIxQcjn5U14SWLYPSjwgeF5 +9+j/MIhCavNMJhdGuq6mJ3Qonpe2/WynN7SA0l3FW6DdfITxAJMOxod1OqtldeimsZLIFmq53wMj +fheeNAP4rW8RSQXW6a8ufvg+ElVcppzpxMeRAi7j1wDHPGNnyWAorgmAHeNqY7bcOt/goQKl+FNk +IIrbirHSJaV5AaGXb55/ diff --git a/t/message-zstd.t b/t/message-zstd.t new file mode 100644 index 00000000..66769bf1 --- /dev/null +++ b/t/message-zstd.t @@ -0,0 +1,98 @@ +#! perl + +use strict; +use warnings; + +use English qw( -no_match_vars ); + +use Test::More; +use Test::Needs 'IO::Compress::Zstd'; +use Path::Tiny qw( path ); + +require HTTP::Message; + +my $files = path($PROGRAM_NAME)->parent->child('files'); +my $lorem_ipsum_clear = $files->child('lorem_ipsum.txt')->slurp_utf8; +my $lorem_ipsum_zstd = $files->child('lorem_ipsum.txt.zst')->slurp_raw; +# my $lorem_ipsum_zstd = $files->child('lorem_ipsum.txt.zst.from_perl')->slurp_raw; +my $lorem_ipsum_zstd_b64 = $files->child('lorem_ipsum.txt.zst.b64')->slurp_raw; + +subtest "no decoding" => sub { + + my $m = HTTP::Message->new( + [ + "Content-Type" => "text/plain", + "Content-Encoding" => "", + ], + $lorem_ipsum_clear + ); + is( $m->decoded_content, $lorem_ipsum_clear, "decoded_content() works, is same as content" ); + ok( $m->decode, "decode() works" ); + is( $m->content, $lorem_ipsum_clear, "... and content() is correct" ); +}; + +subtest "decoding zstd" => sub { + + my $m = HTTP::Message->new( + [ + "Content-Type" => "text/plain", + "Content-Encoding" => "zstd", + ], + $lorem_ipsum_zstd + ); + is( $m->decoded_content, $lorem_ipsum_clear, "decoded_content() works" ); + ok( $m->decode, "decode() works" ); + is( $m->content, $lorem_ipsum_clear, "... and content() is correct" ); +}; + +subtest "decoding zstd in base64" => sub { + + my $m = HTTP::Message->new( + [ + "Content-Type" => "text/plain", + "Content-Encoding" => "zstd, base64", + ], + $lorem_ipsum_zstd_b64 + ); + is( $m->decoded_content, $lorem_ipsum_clear, "decoded_content() works" ); + ok( $m->decode, "decode() works" ); + is( $m->content, $lorem_ipsum_clear, "... and content() is correct" ); +}; + +subtest "encoding to zstd" => sub { + my $m = HTTP::Message->new( + [ + "Content-Type" => "text/plain", + ], + $lorem_ipsum_clear + ); + is( $m->content, $lorem_ipsum_clear, "the content is the original" ); + ok( $m->encode("zstd"), "set encoding to 'zstd'" ); + is( $m->header("Content-Encoding"), + "zstd", "... and Content-Encoding is set" ); + isnt( $m->content, $lorem_ipsum_clear, "... and the content has changed" ); + is( $m->content, $lorem_ipsum_zstd, "... and the content is correct" ); + is( $m->decoded_content, $lorem_ipsum_clear, "decoded_content() works" ); + ok( $m->decode, "decode() works" ); + is( $m->content, $lorem_ipsum_clear, "... and content() is correct" ); +}; + +subtest "encoding to zstd in base64" => sub { + my $m = HTTP::Message->new( + [ + "Content-Type" => "text/plain", + ], + $lorem_ipsum_clear + ); + is( $m->content, $lorem_ipsum_clear, "the content is the original" ); + ok( $m->encode("zstd", "base64"), "set encoding to 'zstd' in 'base64'" ); + is( $m->header("Content-Encoding"), + "zstd, base64", "... and Content-Encoding is set" ); + isnt( $m->content, $lorem_ipsum_clear, "... and the content has changed" ); + is( $m->content, $lorem_ipsum_zstd_b64, "... and the content is correct" ); + is( $m->decoded_content, $lorem_ipsum_clear, "decoded_content() works" ); + ok( $m->decode, "decode() works" ); + is( $m->content, $lorem_ipsum_clear, "... and content() is correct" ); +}; + +done_testing; From 075558c16031a24a1287877f63e67fb63fa96603 Mon Sep 17 00:00:00 2001 From: Mikko Koivunalho Date: Sat, 25 Oct 2025 18:20:42 +0200 Subject: [PATCH 2/5] Add notice about Base64 encoding availability --- lib/HTTP/Message.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm index d0817bdb..2152529e 100644 --- a/lib/HTTP/Message.pm +++ b/lib/HTTP/Message.pm @@ -1099,6 +1099,12 @@ currently supported encodings, subject to availability of required additional modules, are "gzip", "deflate", "x-bzip2", "br", "zstd" and "base64". +N.B. Base64 encoding is not in L. +It is HTTP::Message specific option. +You can use it to ensure that content is always ASCII, and console +friendly. + A successful call to this function will set the C header. From 07bde0528381dd8fb7be5beeed70a51f8b8db9e2 Mon Sep 17 00:00:00 2001 From: Mikko Koivunalho Date: Sat, 25 Oct 2025 18:29:36 +0200 Subject: [PATCH 3/5] Add optional dependency IO::Compress::Zstd to dist.ini --- dist.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/dist.ini b/dist.ini index 113652a3..c0a01f19 100644 --- a/dist.ini +++ b/dist.ini @@ -18,6 +18,7 @@ HTTP::Date = 6 IO::Compress::Brotli = 0.004001 IO::Compress::Bzip2 = 2.021 IO::Uncompress::Brotli = 0.004001 +IO::Compress::Zstd = 2.214 LWP::MediaTypes = 6 MIME::Base64 = 2.1 perl = 5.008001 From c6dc2676ece8e2e8a824a9f5024fc0b953279ca3 Mon Sep 17 00:00:00 2001 From: Mikko Johannes Koivunalho Date: Sat, 25 Oct 2025 23:28:12 +0300 Subject: [PATCH 4/5] Remove zstd file and instead create it from base64 encoded file --- t/files/lorem_ipsum.txt.zst | Bin 1212 -> 0 bytes t/message-zstd.t | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 t/files/lorem_ipsum.txt.zst diff --git a/t/files/lorem_ipsum.txt.zst b/t/files/lorem_ipsum.txt.zst deleted file mode 100644 index acd3cddc6b4e7b4ca18a8f1fb47478199fccad93..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1212 zcmV;t1Vj5MwJ-euSezvQ%3Q<^BQQ^sHfXlH`+G|QR_V8!h!0$0%SM}FKw2{v{u%fN z4yv-i%>dE>)&RG(IhV0ACeL_#ZR5qvLxL-`xlU_&A=G%TYc%6+sS!GJ+qLr{d)&Bq zX_SxSolotcDW;3*@ow6BH}3jgBe;UA9eFaPG%x#ueL3#m9)2%PDunu&$HiyV9_*); zH}QMzc#`VS9#4mte(LGa&J?bwb{wh~HY(lNZkmfi=RRYfJ7@5&xg_JCOxtJA^JDvj z57`@dW^cJu*Jfj5i)o&)r8?hU6Xm%C0W_{ zmj_Fd@kW-BXY4elZft0=$2E@7LCyd*7vF*$ua-CP_DYu@tef_hp#?k9&Kdc#b4PP1 z-b>$8_z2Y@sk}4tLS1fpBakN;z(K(p0JLWcv1{#$#f_YfsgV zRL$kq4tsC~W1m^WUpp*_1OpbMUQ;uDQkQDrMG~=|_${R#$qHzjsTi#Ey21xJqb|k| z9z$uIK7#cvg}dPld{3#2Tca53amt?9o>4Qm5oP)InnSUwJ?z_OWMXJZ^Q9vABh#3+j(mA57=X+X(9c?J>c*G(^zLT*;@^ zLpAG!_YLXV`L^7}L!)_L$h4n^A1kS#4uR#hDmToYIi6R!dWy; zIh>@)Vf{1rXpH?di^Rvd&!4uZ7DErB`X+4k;W&fvkis!R$miuvcu8W|Jt**j;d%Gy z|1gL`YV%Ac7e>0SrYCeLo|m@$Y^OK0fYM#XTcF*1gz*5A4#tOdI;&-M=%%rf$QEk3 z-vc9l7oIc&_^oddNd?yFuP%Q0J`z=2rkv@-$B_aq$r9Z=QcFglS9w diff --git a/t/message-zstd.t b/t/message-zstd.t index 66769bf1..c17a48fe 100644 --- a/t/message-zstd.t +++ b/t/message-zstd.t @@ -9,13 +9,13 @@ use Test::More; use Test::Needs 'IO::Compress::Zstd'; use Path::Tiny qw( path ); +require MIME::Base64; require HTTP::Message; my $files = path($PROGRAM_NAME)->parent->child('files'); my $lorem_ipsum_clear = $files->child('lorem_ipsum.txt')->slurp_utf8; -my $lorem_ipsum_zstd = $files->child('lorem_ipsum.txt.zst')->slurp_raw; -# my $lorem_ipsum_zstd = $files->child('lorem_ipsum.txt.zst.from_perl')->slurp_raw; my $lorem_ipsum_zstd_b64 = $files->child('lorem_ipsum.txt.zst.b64')->slurp_raw; +my $lorem_ipsum_zstd = MIME::Base64::decode($lorem_ipsum_zstd_b64); subtest "no decoding" => sub { From afe4f0be6e168fe44bd68f372513a4b5402eafe5 Mon Sep 17 00:00:00 2001 From: Mikko Johannes Koivunalho Date: Sat, 25 Oct 2025 23:32:34 +0300 Subject: [PATCH 5/5] Rename file lorem_ipsum.txt.zst.b64 --- t/files/{lorem_ipsum.txt.zst.b64 => lorem_ipsum.txt-zst-b64} | 0 t/message-zstd.t | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename t/files/{lorem_ipsum.txt.zst.b64 => lorem_ipsum.txt-zst-b64} (100%) diff --git a/t/files/lorem_ipsum.txt.zst.b64 b/t/files/lorem_ipsum.txt-zst-b64 similarity index 100% rename from t/files/lorem_ipsum.txt.zst.b64 rename to t/files/lorem_ipsum.txt-zst-b64 diff --git a/t/message-zstd.t b/t/message-zstd.t index c17a48fe..e89b7fba 100644 --- a/t/message-zstd.t +++ b/t/message-zstd.t @@ -14,7 +14,7 @@ require HTTP::Message; my $files = path($PROGRAM_NAME)->parent->child('files'); my $lorem_ipsum_clear = $files->child('lorem_ipsum.txt')->slurp_utf8; -my $lorem_ipsum_zstd_b64 = $files->child('lorem_ipsum.txt.zst.b64')->slurp_raw; +my $lorem_ipsum_zstd_b64 = $files->child('lorem_ipsum.txt-zst-b64')->slurp_raw; my $lorem_ipsum_zstd = MIME::Base64::decode($lorem_ipsum_zstd_b64); subtest "no decoding" => sub {