1616
1717package com .mongodb .internal .connection ;
1818
19+ import org .bson .BsonSerializationException ;
1920import org .bson .ByteBuf ;
2021import org .bson .io .OutputBuffer ;
2122
2526import java .util .ArrayList ;
2627import java .util .List ;
2728
29+ import static com .mongodb .assertions .Assertions .assertFalse ;
2830import static com .mongodb .assertions .Assertions .assertTrue ;
2931import static com .mongodb .assertions .Assertions .notNull ;
32+ import static java .lang .String .format ;
3033
3134/**
3235 * <p>This class is not part of the public API and may be removed or changed at any time</p>
@@ -182,11 +185,17 @@ private ByteBuf getCurrentByteBuffer() {
182185 return currentByteBuffer ;
183186 }
184187
188+ private ByteBuf getNextByteBuffer () {
189+ assertFalse (bufferList .get (curBufferIndex ).hasRemaining ());
190+ return getByteBufferAtIndex (++curBufferIndex );
191+ }
192+
185193 private ByteBuf getByteBufferAtIndex (final int index ) {
186194 if (bufferList .size () < index + 1 ) {
187- bufferList .add (bufferProvider .getBuffer (index >= (MAX_SHIFT - INITIAL_SHIFT )
188- ? MAX_BUFFER_SIZE
189- : Math .min (INITIAL_BUFFER_SIZE << index , MAX_BUFFER_SIZE )));
195+ ByteBuf buffer = bufferProvider .getBuffer (index >= (MAX_SHIFT - INITIAL_SHIFT )
196+ ? MAX_BUFFER_SIZE
197+ : Math .min (INITIAL_BUFFER_SIZE << index , MAX_BUFFER_SIZE ));
198+ bufferList .add (buffer );
190199 }
191200 return bufferList .get (index );
192201 }
@@ -229,6 +238,16 @@ public List<ByteBuf> getByteBuffers() {
229238 return buffers ;
230239 }
231240
241+ public List <ByteBuf > getDuplicateByteBuffers () {
242+ ensureOpen ();
243+
244+ List <ByteBuf > buffers = new ArrayList <>(bufferList .size ());
245+ for (final ByteBuf cur : bufferList ) {
246+ buffers .add (cur .duplicate ().order (ByteOrder .LITTLE_ENDIAN ));
247+ }
248+ return buffers ;
249+ }
250+
232251
233252 @ Override
234253 public int pipe (final OutputStream out ) throws IOException {
@@ -237,14 +256,18 @@ public int pipe(final OutputStream out) throws IOException {
237256 byte [] tmp = new byte [INITIAL_BUFFER_SIZE ];
238257
239258 int total = 0 ;
240- for (final ByteBuf cur : getByteBuffers ()) {
241- ByteBuf dup = cur .duplicate ();
242- while (dup .hasRemaining ()) {
243- int numBytesToCopy = Math .min (dup .remaining (), tmp .length );
244- dup .get (tmp , 0 , numBytesToCopy );
245- out .write (tmp , 0 , numBytesToCopy );
259+ List <ByteBuf > byteBuffers = getByteBuffers ();
260+ try {
261+ for (final ByteBuf cur : byteBuffers ) {
262+ while (cur .hasRemaining ()) {
263+ int numBytesToCopy = Math .min (cur .remaining (), tmp .length );
264+ cur .get (tmp , 0 , numBytesToCopy );
265+ out .write (tmp , 0 , numBytesToCopy );
266+ }
267+ total += cur .limit ();
246268 }
247- total += dup .limit ();
269+ } finally {
270+ byteBuffers .forEach (ByteBuf ::release );
248271 }
249272 return total ;
250273 }
@@ -370,4 +393,165 @@ private static final class BufferPositionPair {
370393 this .position = position ;
371394 }
372395 }
396+
397+ protected int writeCharacters (final String str , final boolean checkNullTermination ) {
398+ int stringLength = str .length ();
399+ int sp = 0 ;
400+ int prevPos = position ;
401+
402+ ByteBuf curBuffer = getCurrentByteBuffer ();
403+ int curBufferPos = curBuffer .position ();
404+ int curBufferLimit = curBuffer .limit ();
405+ int remaining = curBufferLimit - curBufferPos ;
406+
407+ if (curBuffer .hasArray ()) {
408+ byte [] dst = curBuffer .array ();
409+ int arrayOffset = curBuffer .arrayOffset ();
410+ if (remaining >= str .length () + 1 ) {
411+ // Write ASCII characters directly to the array until we hit a non-ASCII character.
412+ sp = writeOnArrayAscii (str , dst , arrayOffset + curBufferPos , checkNullTermination );
413+ curBufferPos += sp ;
414+ // If the whole string was written as ASCII, append the null terminator.
415+ if (sp == stringLength ) {
416+ dst [arrayOffset + curBufferPos ++] = 0 ;
417+ position += sp + 1 ;
418+ curBuffer .position (curBufferPos );
419+ return sp + 1 ;
420+ }
421+ // Otherwise, update the position to reflect the partial write.
422+ position += sp ;
423+ curBuffer .position (curBufferPos );
424+ }
425+ }
426+
427+ // We get here, when the buffer is not backed by an array, or when the string contains at least one non-ASCII characters.
428+ return writeOnBuffers (str ,
429+ checkNullTermination ,
430+ sp ,
431+ stringLength ,
432+ curBufferLimit ,
433+ curBufferPos ,
434+ curBuffer ,
435+ prevPos );
436+ }
437+
438+ private int writeOnBuffers (final String str ,
439+ final boolean checkNullTermination ,
440+ final int stringPointer ,
441+ final int stringLength ,
442+ final int bufferLimit ,
443+ final int bufferPos ,
444+ final ByteBuf buffer ,
445+ final int prevPos ) {
446+ int remaining ;
447+ int sp = stringPointer ;
448+ int curBufferPos = bufferPos ;
449+ int curBufferLimit = bufferLimit ;
450+ ByteBuf curBuffer = buffer ;
451+ while (sp < stringLength ) {
452+ remaining = curBufferLimit - curBufferPos ;
453+ int c = str .charAt (sp );
454+
455+ if (checkNullTermination && c == 0x0 ) {
456+ throw new BsonSerializationException (
457+ format ("BSON cstring '%s' is not valid because it contains a null character " + "at index %d" , str , sp ));
458+ }
459+
460+ if (c < 0x80 ) {
461+ if (remaining == 0 ) {
462+ curBuffer = getNextByteBuffer ();
463+ curBufferPos = 0 ;
464+ curBufferLimit = curBuffer .limit ();
465+ }
466+ curBuffer .put ((byte ) c );
467+ curBufferPos ++;
468+ position ++;
469+ } else if (c < 0x800 ) {
470+ if (remaining < 2 ) {
471+ // Not enough space: use write() to handle buffer boundary
472+ write ((byte ) (0xc0 + (c >> 6 )));
473+ write ((byte ) (0x80 + (c & 0x3f )));
474+
475+ curBuffer = getCurrentByteBuffer ();
476+ curBufferPos = curBuffer .position ();
477+ curBufferLimit = curBuffer .limit ();
478+ } else {
479+ curBuffer .put ((byte ) (0xc0 + (c >> 6 )));
480+ curBuffer .put ((byte ) (0x80 + (c & 0x3f )));
481+ curBufferPos += 2 ;
482+ position += 2 ;
483+ }
484+ } else {
485+ // Handle multibyte characters (may involve surrogate pairs).
486+ c = Character .codePointAt (str , sp );
487+ /*
488+ Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point.
489+ This known deviation from the spec and current functionality remains for backward compatibility.
490+ Ticket: JAVA-5575
491+ */
492+ if (c < 0x10000 ) {
493+ if (remaining < 3 ) {
494+ write ((byte ) (0xe0 + (c >> 12 )));
495+ write ((byte ) (0x80 + ((c >> 6 ) & 0x3f )));
496+ write ((byte ) (0x80 + (c & 0x3f )));
497+
498+ curBuffer = getCurrentByteBuffer ();
499+ curBufferPos = curBuffer .position ();
500+ curBufferLimit = curBuffer .limit ();
501+ } else {
502+ curBuffer .put ((byte ) (0xe0 + (c >> 12 )));
503+ curBuffer .put ((byte ) (0x80 + ((c >> 6 ) & 0x3f )));
504+ curBuffer .put ((byte ) (0x80 + (c & 0x3f )));
505+ curBufferPos += 3 ;
506+ position += 3 ;
507+ }
508+ } else {
509+ if (remaining < 4 ) {
510+ write ((byte ) (0xf0 + (c >> 18 )));
511+ write ((byte ) (0x80 + ((c >> 12 ) & 0x3f )));
512+ write ((byte ) (0x80 + ((c >> 6 ) & 0x3f )));
513+ write ((byte ) (0x80 + (c & 0x3f )));
514+
515+ curBuffer = getCurrentByteBuffer ();
516+ curBufferPos = curBuffer .position ();
517+ curBufferLimit = curBuffer .limit ();
518+ } else {
519+ curBuffer .put ((byte ) (0xf0 + (c >> 18 )));
520+ curBuffer .put ((byte ) (0x80 + ((c >> 12 ) & 0x3f )));
521+ curBuffer .put ((byte ) (0x80 + ((c >> 6 ) & 0x3f )));
522+ curBuffer .put ((byte ) (0x80 + (c & 0x3f )));
523+ curBufferPos += 4 ;
524+ position += 4 ;
525+ }
526+ }
527+ }
528+ sp += Character .charCount (c );
529+ }
530+
531+ getCurrentByteBuffer ().put ((byte ) 0 );
532+ position ++;
533+ return position - prevPos ;
534+ }
535+
536+ private static int writeOnArrayAscii (final String str ,
537+ final byte [] dst ,
538+ final int arrayPosition ,
539+ final boolean checkNullTermination ) {
540+ int pos = arrayPosition ;
541+ int sp = 0 ;
542+ // Fast common path: This tight loop is JIT-friendly (simple, no calls, few branches),
543+ // It might be unrolled for performance.
544+ for (; sp < str .length (); sp ++, pos ++) {
545+ char c = str .charAt (sp );
546+ if (checkNullTermination && c == 0 ) {
547+ throw new BsonSerializationException (
548+ format ("BSON cstring '%s' is not valid because it contains a null character " + "at index %d" , str , sp ));
549+ }
550+ if (c >= 0x80 ) {
551+ break ;
552+ }
553+ dst [pos ] = (byte ) c ;
554+ }
555+ return sp ;
556+ }
373557}
0 commit comments