Skip to content

Commit 8d0b961

Browse files
committed
PHPC-2634: BSON binary vector corpus tests
1 parent 17e1e35 commit 8d0b961

24 files changed

+852
-0
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
<?php
2+
3+
use MongoDB\BSON\Document;
4+
use MongoDB\BSON\VectorType;
5+
use MongoDB\Driver\Exception\UnexpectedValueException;
6+
7+
require_once __DIR__ . '/../tests/utils/basic.inc';
8+
9+
$expectedFailures = [
10+
'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: FLOAT32 with padding' => 'Document memory leak (PHPC-2648)',
11+
'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Insufficient vector data with 3 bytes FLOAT32' => 'Document memory leak (PHPC-2648)',
12+
'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Insufficient vector data with 5 bytes FLOAT32' => 'Document memory leak (PHPC-2648)',
13+
'Tests of Binary subtype 9, Vectors, with dtype INT8: INT8 with padding' => 'Document memory leak (PHPC-2648)',
14+
'Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT: Padding specified with no vector data PACKED_BIT' => 'Document memory leak (PHPC-2648)',
15+
'Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT: Exceeding maximum padding PACKED_BIT' => 'Document memory leak (PHPC-2648)',
16+
];
17+
18+
$outputPath = realpath(__DIR__ . '/../tests') . '/bson-binary-vector/';
19+
20+
if ( ! is_dir($outputPath) && ! mkdir($outputPath, 0755, true)) {
21+
printf("Error creating output path: %s\n", $outputPath);
22+
}
23+
24+
foreach (array_slice($argv, 1) as $inputFile) {
25+
if ( ! is_readable($inputFile) || ! is_file($inputFile)) {
26+
printf("Error reading %s\n", $inputFile);
27+
continue;
28+
}
29+
30+
try {
31+
$test = Document::fromJSON(file_get_contents($inputFile))->toPHP(['root' => 'array', 'document' => 'array']);
32+
} catch (UnexpectedValueException $e) {
33+
printf("Error decoding %s: %s\n", $inputFile, $e->getMessage());
34+
continue;
35+
}
36+
37+
if ( ! isset($test['description'])) {
38+
printf("Skipping test file without \"description\" field: %s\n", $inputFile);
39+
continue;
40+
}
41+
42+
if ( ! isset($test['test_key'])) {
43+
printf("Skipping test file without \"test_key\" field: %s\n", $inputFile);
44+
continue;
45+
}
46+
47+
if ( ! empty($test['tests'])) {
48+
foreach ($test['tests'] as $i => $case) {
49+
$outputFile = sprintf('%s-%03d.phpt', pathinfo($inputFile, PATHINFO_FILENAME), $i + 1);
50+
try {
51+
$output = renderPhpt(getParamsForTestCase($test, $case), $expectedFailures);
52+
} catch (Exception $e) {
53+
printf("Error processing tests[%d] in %s: %s\n", $i, $inputFile, $e->getMessage());
54+
continue;
55+
}
56+
57+
if (false === file_put_contents($outputPath . '/' . $outputFile, $output)) {
58+
printf("Error writing tests[%d] in %s\n", $i, $inputFile);
59+
continue;
60+
}
61+
}
62+
}
63+
}
64+
65+
function convertVector(array $input, VectorType $vectorType, int $padding): array
66+
{
67+
if (!array_is_list($input)) {
68+
throw new InvalidArgumentException('Vector is not a list');
69+
}
70+
71+
if ($vectorType != VectorType::PackedBit) {
72+
if ($padding !== 0) {
73+
throw new InvalidArgumentException(sprintf('Vector type %s does not support padding: %d', $vectorType->name, $padding));
74+
}
75+
76+
return $input;
77+
}
78+
79+
if ($padding < 0 || $padding > 7) {
80+
throw new InvalidArgumentException(sprintf('Expected padding [0..7], %d given', $padding));
81+
}
82+
83+
if (count($input) === 0 && $padding > 0) {
84+
throw new InvalidArgumentException(sprintf('Non-zero padding (%d) given for empty vector', $padding));
85+
}
86+
87+
$vector = [];
88+
89+
foreach ($input as $i => $byte) {
90+
if (!is_int($byte)) {
91+
throw new InvalidArgumentException(sprintf('Expected vector[%d] to be an int, %s given', $i, get_debug_type($byte)));
92+
}
93+
94+
if ($byte < 0 || $byte > 255) {
95+
throw new InvalidArgumentException(sprintf('Expected vector[%d] to be an unsigned int8, %d given', $i, $byte));
96+
}
97+
98+
// decbin() may return fewer than 8 binary digits, so left-pad its output with zeroes
99+
$digits = str_pad(decbin($byte), 8, '0', STR_PAD_LEFT);
100+
101+
$bits = array_map(intval(...), str_split($digits));
102+
103+
array_push($vector, ...$bits);
104+
}
105+
106+
// Remove trailing zeroes from the final byte's digits according to padding
107+
if ($padding > 0) {
108+
$removed = array_splice($vector, -$padding);
109+
110+
// Assert that only zeroes were removed
111+
if ($removed !== array_fill(0, $padding, 0)) {
112+
throw new InvalidArgumentException(sprintf('Application of padding %d would remove non-zero digits: %s', $padding, json_encode($removed)));
113+
}
114+
}
115+
116+
return $vector;
117+
}
118+
119+
function getParamsForTestCase(array $test, array $case): array
120+
{
121+
foreach (['description', 'valid', 'dtype_hex'] as $field) {
122+
if (!isset($case[$field])) {
123+
throw new InvalidArgumentException(sprintf('Missing "%s" field', $field));
124+
}
125+
}
126+
127+
$code = '';
128+
$expect = '';
129+
130+
$vectorType = match ($case['dtype_hex']) {
131+
'0x27' => VectorType::Float32,
132+
'0x03' => VectorType::Int8,
133+
'0x10' => VectorType::PackedBit,
134+
};
135+
136+
$padding = $case['padding'] ?? 0;
137+
138+
if ($case['valid']) {
139+
$code .= sprintf('// Vector %s with padding %d', json_encode($case['vector']), $padding) . "\n";
140+
141+
/* encode a document from the numeric values, dtype, and padding, along
142+
* with the "test_key", and assert this matches the canonical_bson
143+
* string. */
144+
$vector = convertVector($case['vector'], $vectorType, $padding);
145+
$code .= sprintf('$vector = %s;', var_export($vector, true)) . "\n\n";
146+
$code .= sprintf('$binary = MongoDB\BSON\Binary::fromVector($vector, %s);', var_export($vectorType, true)) . "\n";
147+
$code .= sprintf('echo bin2hex((string) MongoDB\BSON\Document::fromPHP([%s => $binary])), "\n";', var_export($test['test_key'], true)) . "\n\n";
148+
$expect .= strtolower($case['canonical_bson']) . "\n";
149+
150+
/* decode the canonical_bson into its binary form, and then assert that
151+
* the numeric values, dtype, and padding all match those provided in
152+
* the JSON. */
153+
$code .= sprintf('$bson = MongoDB\BSON\Document::fromBSON(hex2bin(%s));', var_export($case['canonical_bson'], true)) . "\n";
154+
$code .= sprintf('var_dump($binary == $bson[%s]);', var_export($test['test_key'], true)) . "\n";
155+
$expect .= 'bool(true)' . "\n";
156+
} else /* not valid */ {
157+
/* if the vector field is present, raise an exception when attempting
158+
* to encode a document from the numeric values, dtype, and padding. */
159+
if (isset($case['vector'])) {
160+
$code .= sprintf('// Vector %s with padding %d', json_encode($case['vector']), $padding) . "\n";
161+
162+
try {
163+
$vector = convertVector($case['vector'], $vectorType, $padding);
164+
165+
$code .= sprintf('$vector = %s;', var_export($vector, true)) . "\n\n";
166+
$code .= "throws(function() use (\$vector) {\n";
167+
$code .= sprintf(" var_dump(MongoDB\BSON\Binary::fromVector(\$vector, %s));\n", var_export($vectorType, true));
168+
$code .= "}, 'MongoDB\Driver\Exception\InvalidArgumentException');" . "\n\n";
169+
$expect .= 'OK: Got MongoDB\Driver\Exception\InvalidArgumentException' . "\n";
170+
} catch (InvalidArgumentException $e) {
171+
$code .= sprintf('echo %s, "\n";', var_export($e->getMessage(), true)) . "\n\n";
172+
$expect .= $e->getMessage() . "\n";
173+
}
174+
}
175+
176+
/* if the canonical_bson field is present, raise an exception when
177+
* attempting to deserialize it into the corresponding numeric values,
178+
* as the field contains corrupted data. */
179+
if (isset($case['canonical_bson'])) {
180+
$code .= "throws(function() {\n";
181+
$code .= sprintf(' var_dump(MongoDB\BSON\Document::fromBSON(hex2bin(%s)));', var_export($case['canonical_bson'], true)) . "\n";
182+
$code .= "}, 'MongoDB\Driver\Exception\InvalidArgumentException');";
183+
$expect .= 'OK: Got MongoDB\Driver\Exception\InvalidArgumentException' . "\n";
184+
}
185+
}
186+
187+
return [
188+
'%NAME%' => sprintf('%s: %s', trim($test['description']), trim($case['description'])),
189+
'%CODE%' => trim($code),
190+
'%EXPECT%' => trim($expect),
191+
];
192+
}
193+
194+
function renderPhpt(array $params, array $expectedFailures): string
195+
{
196+
$params['%XFAIL%'] = isset($expectedFailures[$params['%NAME%']])
197+
? "--XFAIL--\n" . $expectedFailures[$params['%NAME%']] . "\n"
198+
: '';
199+
200+
$params['%SKIPIF%'] = '';
201+
202+
$template = <<< 'TEMPLATE'
203+
--TEST--
204+
%NAME%
205+
%XFAIL%%SKIPIF%--DESCRIPTION--
206+
Generated by scripts/convert-bson-corpus-tests.php
207+
208+
DO NOT EDIT THIS FILE
209+
--FILE--
210+
<?php
211+
212+
require_once __DIR__ . '/../utils/basic.inc';
213+
214+
%CODE%
215+
216+
?>
217+
===DONE===
218+
<?php exit(0); ?>
219+
--EXPECT--
220+
%EXPECT%
221+
===DONE===
222+
TEMPLATE;
223+
224+
return str_replace(array_keys($params), array_values($params), $template);
225+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Simple Vector FLOAT32
3+
--DESCRIPTION--
4+
Generated by scripts/convert-bson-corpus-tests.php
5+
6+
DO NOT EDIT THIS FILE
7+
--FILE--
8+
<?php
9+
10+
require_once __DIR__ . '/../utils/basic.inc';
11+
12+
// Vector [127,7] with padding 0
13+
$vector = array (
14+
0 => 127.0,
15+
1 => 7.0,
16+
);
17+
18+
$binary = MongoDB\BSON\Binary::fromVector($vector, \MongoDB\BSON\VectorType::Float32);
19+
echo bin2hex((string) MongoDB\BSON\Document::fromPHP(['vector' => $binary])), "\n";
20+
21+
$bson = MongoDB\BSON\Document::fromBSON(hex2bin('1C00000005766563746F72000A0000000927000000FE420000E04000'));
22+
var_dump($binary == $bson['vector']);
23+
24+
?>
25+
===DONE===
26+
<?php exit(0); ?>
27+
--EXPECT--
28+
1c00000005766563746f72000a0000000927000000fe420000e04000
29+
bool(true)
30+
===DONE===
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Vector with decimals and negative value FLOAT32
3+
--DESCRIPTION--
4+
Generated by scripts/convert-bson-corpus-tests.php
5+
6+
DO NOT EDIT THIS FILE
7+
--FILE--
8+
<?php
9+
10+
require_once __DIR__ . '/../utils/basic.inc';
11+
12+
// Vector [127.7,-7.7] with padding 0
13+
$vector = array (
14+
0 => 127.7,
15+
1 => -7.7,
16+
);
17+
18+
$binary = MongoDB\BSON\Binary::fromVector($vector, \MongoDB\BSON\VectorType::Float32);
19+
echo bin2hex((string) MongoDB\BSON\Document::fromPHP(['vector' => $binary])), "\n";
20+
21+
$bson = MongoDB\BSON\Document::fromBSON(hex2bin('1C00000005766563746F72000A0000000927006666FF426666F6C000'));
22+
var_dump($binary == $bson['vector']);
23+
24+
?>
25+
===DONE===
26+
<?php exit(0); ?>
27+
--EXPECT--
28+
1c00000005766563746f72000a0000000927006666ff426666f6c000
29+
bool(true)
30+
===DONE===
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
--TEST--
2+
Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Empty Vector FLOAT32
3+
--DESCRIPTION--
4+
Generated by scripts/convert-bson-corpus-tests.php
5+
6+
DO NOT EDIT THIS FILE
7+
--FILE--
8+
<?php
9+
10+
require_once __DIR__ . '/../utils/basic.inc';
11+
12+
// Vector [] with padding 0
13+
$vector = array (
14+
);
15+
16+
$binary = MongoDB\BSON\Binary::fromVector($vector, \MongoDB\BSON\VectorType::Float32);
17+
echo bin2hex((string) MongoDB\BSON\Document::fromPHP(['vector' => $binary])), "\n";
18+
19+
$bson = MongoDB\BSON\Document::fromBSON(hex2bin('1400000005766563746F72000200000009270000'));
20+
var_dump($binary == $bson['vector']);
21+
22+
?>
23+
===DONE===
24+
<?php exit(0); ?>
25+
--EXPECT--
26+
1400000005766563746f72000200000009270000
27+
bool(true)
28+
===DONE===
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
--TEST--
2+
Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Infinity Vector FLOAT32
3+
--DESCRIPTION--
4+
Generated by scripts/convert-bson-corpus-tests.php
5+
6+
DO NOT EDIT THIS FILE
7+
--FILE--
8+
<?php
9+
10+
require_once __DIR__ . '/../utils/basic.inc';
11+
12+
// Vector with padding 0
13+
$vector = array (
14+
0 => -INF,
15+
1 => 0.0,
16+
2 => INF,
17+
);
18+
19+
$binary = MongoDB\BSON\Binary::fromVector($vector, \MongoDB\BSON\VectorType::Float32);
20+
echo bin2hex((string) MongoDB\BSON\Document::fromPHP(['vector' => $binary])), "\n";
21+
22+
$bson = MongoDB\BSON\Document::fromBSON(hex2bin('2000000005766563746F72000E000000092700000080FF000000000000807F00'));
23+
var_dump($binary == $bson['vector']);
24+
25+
?>
26+
===DONE===
27+
<?php exit(0); ?>
28+
--EXPECT--
29+
2000000005766563746f72000e000000092700000080ff000000000000807f00
30+
bool(true)
31+
===DONE===
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
--TEST--
2+
Tests of Binary subtype 9, Vectors, with dtype FLOAT32: FLOAT32 with padding
3+
--XFAIL--
4+
Document memory leak (PHPC-2648)
5+
--DESCRIPTION--
6+
Generated by scripts/convert-bson-corpus-tests.php
7+
8+
DO NOT EDIT THIS FILE
9+
--FILE--
10+
<?php
11+
12+
require_once __DIR__ . '/../utils/basic.inc';
13+
14+
// Vector [127,7] with padding 3
15+
echo 'Vector type Float32 does not support padding: 3', "\n";
16+
17+
throws(function() {
18+
var_dump(MongoDB\BSON\Document::fromBSON(hex2bin('1C00000005766563746F72000A0000000927030000FE420000E04000')));
19+
}, 'MongoDB\Driver\Exception\InvalidArgumentException');
20+
21+
?>
22+
===DONE===
23+
<?php exit(0); ?>
24+
--EXPECT--
25+
Vector type Float32 does not support padding: 3
26+
OK: Got MongoDB\Driver\Exception\InvalidArgumentException
27+
===DONE===

0 commit comments

Comments
 (0)