|
| 1 | +<?php |
| 2 | + |
| 3 | +use MongoDB\BSON\Document; |
| 4 | +use MongoDB\BSON\VectorType; |
| 5 | +use MongoDB\Driver\Exception\UnexpectedValueException; |
| 6 | + |
| 7 | +require_once __DIR__ . '/../tests/utils/basic.inc'; |
| 8 | + |
| 9 | +$expectedFailures = [ |
| 10 | + 'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: FLOAT32 with padding' => 'Document memory leak (PHPC-2648)', |
| 11 | + 'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Insufficient vector data with 3 bytes FLOAT32' => 'Document memory leak (PHPC-2648)', |
| 12 | + 'Tests of Binary subtype 9, Vectors, with dtype FLOAT32: Insufficient vector data with 5 bytes FLOAT32' => 'Document memory leak (PHPC-2648)', |
| 13 | + 'Tests of Binary subtype 9, Vectors, with dtype INT8: INT8 with padding' => 'Document memory leak (PHPC-2648)', |
| 14 | + 'Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT: Padding specified with no vector data PACKED_BIT' => 'Document memory leak (PHPC-2648)', |
| 15 | + 'Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT: Exceeding maximum padding PACKED_BIT' => 'Document memory leak (PHPC-2648)', |
| 16 | +]; |
| 17 | + |
| 18 | +$outputPath = realpath(__DIR__ . '/../tests') . '/bson-binary-vector/'; |
| 19 | + |
| 20 | +if ( ! is_dir($outputPath) && ! mkdir($outputPath, 0755, true)) { |
| 21 | + printf("Error creating output path: %s\n", $outputPath); |
| 22 | +} |
| 23 | + |
| 24 | +foreach (array_slice($argv, 1) as $inputFile) { |
| 25 | + if ( ! is_readable($inputFile) || ! is_file($inputFile)) { |
| 26 | + printf("Error reading %s\n", $inputFile); |
| 27 | + continue; |
| 28 | + } |
| 29 | + |
| 30 | + try { |
| 31 | + $test = Document::fromJSON(file_get_contents($inputFile))->toPHP(['root' => 'array', 'document' => 'array']); |
| 32 | + } catch (UnexpectedValueException $e) { |
| 33 | + printf("Error decoding %s: %s\n", $inputFile, $e->getMessage()); |
| 34 | + continue; |
| 35 | + } |
| 36 | + |
| 37 | + if ( ! isset($test['description'])) { |
| 38 | + printf("Skipping test file without \"description\" field: %s\n", $inputFile); |
| 39 | + continue; |
| 40 | + } |
| 41 | + |
| 42 | + if ( ! isset($test['test_key'])) { |
| 43 | + printf("Skipping test file without \"test_key\" field: %s\n", $inputFile); |
| 44 | + continue; |
| 45 | + } |
| 46 | + |
| 47 | + if ( ! empty($test['tests'])) { |
| 48 | + foreach ($test['tests'] as $i => $case) { |
| 49 | + $outputFile = sprintf('%s-%03d.phpt', pathinfo($inputFile, PATHINFO_FILENAME), $i + 1); |
| 50 | + try { |
| 51 | + $output = renderPhpt(getParamsForTestCase($test, $case), $expectedFailures); |
| 52 | + } catch (Exception $e) { |
| 53 | + printf("Error processing tests[%d] in %s: %s\n", $i, $inputFile, $e->getMessage()); |
| 54 | + continue; |
| 55 | + } |
| 56 | + |
| 57 | + if (false === file_put_contents($outputPath . '/' . $outputFile, $output)) { |
| 58 | + printf("Error writing tests[%d] in %s\n", $i, $inputFile); |
| 59 | + continue; |
| 60 | + } |
| 61 | + } |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +function convertVector(array $input, VectorType $vectorType, int $padding): array |
| 66 | +{ |
| 67 | + if (!array_is_list($input)) { |
| 68 | + throw new InvalidArgumentException('Vector is not a list'); |
| 69 | + } |
| 70 | + |
| 71 | + if ($vectorType != VectorType::PackedBit) { |
| 72 | + if ($padding !== 0) { |
| 73 | + throw new InvalidArgumentException(sprintf('Vector type %s does not support padding: %d', $vectorType->name, $padding)); |
| 74 | + } |
| 75 | + |
| 76 | + return $input; |
| 77 | + } |
| 78 | + |
| 79 | + if ($padding < 0 || $padding > 7) { |
| 80 | + throw new InvalidArgumentException(sprintf('Expected padding [0..7], %d given', $padding)); |
| 81 | + } |
| 82 | + |
| 83 | + if (count($input) === 0 && $padding > 0) { |
| 84 | + throw new InvalidArgumentException(sprintf('Non-zero padding (%d) given for empty vector', $padding)); |
| 85 | + } |
| 86 | + |
| 87 | + $vector = []; |
| 88 | + |
| 89 | + foreach ($input as $i => $byte) { |
| 90 | + if (!is_int($byte)) { |
| 91 | + throw new InvalidArgumentException(sprintf('Expected vector[%d] to be an int, %s given', $i, get_debug_type($byte))); |
| 92 | + } |
| 93 | + |
| 94 | + if ($byte < 0 || $byte > 255) { |
| 95 | + throw new InvalidArgumentException(sprintf('Expected vector[%d] to be an unsigned int8, %d given', $i, $byte)); |
| 96 | + } |
| 97 | + |
| 98 | + // decbin() may return fewer than 8 binary digits, so left-pad its output with zeroes |
| 99 | + $digits = str_pad(decbin($byte), 8, '0', STR_PAD_LEFT); |
| 100 | + |
| 101 | + $bits = array_map(intval(...), str_split($digits)); |
| 102 | + |
| 103 | + array_push($vector, ...$bits); |
| 104 | + } |
| 105 | + |
| 106 | + // Remove trailing zeroes from the final byte's digits according to padding |
| 107 | + if ($padding > 0) { |
| 108 | + $removed = array_splice($vector, -$padding); |
| 109 | + |
| 110 | + // Assert that only zeroes were removed |
| 111 | + if ($removed !== array_fill(0, $padding, 0)) { |
| 112 | + throw new InvalidArgumentException(sprintf('Application of padding %d would remove non-zero digits: %s', $padding, json_encode($removed))); |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + return $vector; |
| 117 | +} |
| 118 | + |
| 119 | +function getParamsForTestCase(array $test, array $case): array |
| 120 | +{ |
| 121 | + foreach (['description', 'valid', 'dtype_hex'] as $field) { |
| 122 | + if (!isset($case[$field])) { |
| 123 | + throw new InvalidArgumentException(sprintf('Missing "%s" field', $field)); |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + $code = ''; |
| 128 | + $expect = ''; |
| 129 | + |
| 130 | + $vectorType = match ($case['dtype_hex']) { |
| 131 | + '0x27' => VectorType::Float32, |
| 132 | + '0x03' => VectorType::Int8, |
| 133 | + '0x10' => VectorType::PackedBit, |
| 134 | + }; |
| 135 | + |
| 136 | + $padding = $case['padding'] ?? 0; |
| 137 | + |
| 138 | + if ($case['valid']) { |
| 139 | + $code .= sprintf('// Vector %s with padding %d', json_encode($case['vector']), $padding) . "\n"; |
| 140 | + |
| 141 | + /* encode a document from the numeric values, dtype, and padding, along |
| 142 | + * with the "test_key", and assert this matches the canonical_bson |
| 143 | + * string. */ |
| 144 | + $vector = convertVector($case['vector'], $vectorType, $padding); |
| 145 | + $code .= sprintf('$vector = %s;', var_export($vector, true)) . "\n\n"; |
| 146 | + $code .= sprintf('$binary = MongoDB\BSON\Binary::fromVector($vector, %s);', var_export($vectorType, true)) . "\n"; |
| 147 | + $code .= sprintf('echo bin2hex((string) MongoDB\BSON\Document::fromPHP([%s => $binary])), "\n";', var_export($test['test_key'], true)) . "\n\n"; |
| 148 | + $expect .= strtolower($case['canonical_bson']) . "\n"; |
| 149 | + |
| 150 | + /* decode the canonical_bson into its binary form, and then assert that |
| 151 | + * the numeric values, dtype, and padding all match those provided in |
| 152 | + * the JSON. */ |
| 153 | + $code .= sprintf('$bson = MongoDB\BSON\Document::fromBSON(hex2bin(%s));', var_export($case['canonical_bson'], true)) . "\n"; |
| 154 | + $code .= sprintf('var_dump($binary == $bson[%s]);', var_export($test['test_key'], true)) . "\n"; |
| 155 | + $expect .= 'bool(true)' . "\n"; |
| 156 | + } else /* not valid */ { |
| 157 | + /* if the vector field is present, raise an exception when attempting |
| 158 | + * to encode a document from the numeric values, dtype, and padding. */ |
| 159 | + if (isset($case['vector'])) { |
| 160 | + $code .= sprintf('// Vector %s with padding %d', json_encode($case['vector']), $padding) . "\n"; |
| 161 | + |
| 162 | + try { |
| 163 | + $vector = convertVector($case['vector'], $vectorType, $padding); |
| 164 | + |
| 165 | + $code .= sprintf('$vector = %s;', var_export($vector, true)) . "\n\n"; |
| 166 | + $code .= "throws(function() use (\$vector) {\n"; |
| 167 | + $code .= sprintf(" var_dump(MongoDB\BSON\Binary::fromVector(\$vector, %s));\n", var_export($vectorType, true)); |
| 168 | + $code .= "}, 'MongoDB\Driver\Exception\InvalidArgumentException');" . "\n\n"; |
| 169 | + $expect .= 'OK: Got MongoDB\Driver\Exception\InvalidArgumentException' . "\n"; |
| 170 | + } catch (InvalidArgumentException $e) { |
| 171 | + $code .= sprintf('echo %s, "\n";', var_export($e->getMessage(), true)) . "\n\n"; |
| 172 | + $expect .= $e->getMessage() . "\n"; |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + /* if the canonical_bson field is present, raise an exception when |
| 177 | + * attempting to deserialize it into the corresponding numeric values, |
| 178 | + * as the field contains corrupted data. */ |
| 179 | + if (isset($case['canonical_bson'])) { |
| 180 | + $code .= "throws(function() {\n"; |
| 181 | + $code .= sprintf(' var_dump(MongoDB\BSON\Document::fromBSON(hex2bin(%s)));', var_export($case['canonical_bson'], true)) . "\n"; |
| 182 | + $code .= "}, 'MongoDB\Driver\Exception\InvalidArgumentException');"; |
| 183 | + $expect .= 'OK: Got MongoDB\Driver\Exception\InvalidArgumentException' . "\n"; |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + return [ |
| 188 | + '%NAME%' => sprintf('%s: %s', trim($test['description']), trim($case['description'])), |
| 189 | + '%CODE%' => trim($code), |
| 190 | + '%EXPECT%' => trim($expect), |
| 191 | + ]; |
| 192 | +} |
| 193 | + |
| 194 | +function renderPhpt(array $params, array $expectedFailures): string |
| 195 | +{ |
| 196 | + $params['%XFAIL%'] = isset($expectedFailures[$params['%NAME%']]) |
| 197 | + ? "--XFAIL--\n" . $expectedFailures[$params['%NAME%']] . "\n" |
| 198 | + : ''; |
| 199 | + |
| 200 | + $params['%SKIPIF%'] = ''; |
| 201 | + |
| 202 | + $template = <<< 'TEMPLATE' |
| 203 | +--TEST-- |
| 204 | +%NAME% |
| 205 | +%XFAIL%%SKIPIF%--DESCRIPTION-- |
| 206 | +Generated by scripts/convert-bson-corpus-tests.php |
| 207 | +
|
| 208 | +DO NOT EDIT THIS FILE |
| 209 | +--FILE-- |
| 210 | +<?php |
| 211 | +
|
| 212 | +require_once __DIR__ . '/../utils/basic.inc'; |
| 213 | +
|
| 214 | +%CODE% |
| 215 | +
|
| 216 | +?> |
| 217 | +===DONE=== |
| 218 | +<?php exit(0); ?> |
| 219 | +--EXPECT-- |
| 220 | +%EXPECT% |
| 221 | +===DONE=== |
| 222 | +TEMPLATE; |
| 223 | + |
| 224 | + return str_replace(array_keys($params), array_values($params), $template); |
| 225 | +} |
0 commit comments