@@ -81,16 +81,18 @@ public struct AsyncBufferSequence: AsyncSequence, Sendable {
8181 return Iterator ( diskIO: self . diskIO)
8282 }
8383
84+ // [New API: 0.0.1]
8485 public func lines< Encoding: _UnicodeEncoding > (
8586 encoding: Encoding . Type = UTF8 . self,
86- bufferingPolicy: LineSequence < Encoding > . BufferingPolicy = . unbounded
87+ bufferingPolicy: LineSequence < Encoding > . BufferingPolicy = . maxLineLength ( 128 * 1024 )
8788 ) -> LineSequence < Encoding > {
8889 return LineSequence ( underlying: self , encoding: encoding, bufferingPolicy: bufferingPolicy)
8990 }
9091}
9192
9293// MARK: - LineSequence
9394extension AsyncBufferSequence {
95+ // [New API: 0.0.1]
9496 public struct LineSequence < Encoding: _UnicodeEncoding > : AsyncSequence , Sendable {
9597 public typealias Element = String
9698
@@ -140,25 +142,20 @@ extension AsyncBufferSequence {
140142 }
141143 #else
142144 // Unfortunately here we _have to_ copy the bytes out because
143- // DisptachIO (rightfully) reuses buffer, which means `buffer.data`
145+ // DispatchIO (rightfully) reuses buffer, which means `buffer.data`
144146 // has the same address on all iterations, therefore we can't directly
145147 // create the result array from buffer.data
146- let temporary = UnsafeMutableBufferPointer< Encoding . CodeUnit> . allocate(
147- capacity: buffer. data. count
148- )
149- defer { temporary. deallocate ( ) }
150- let actualBytesCopied = buffer. data. copyBytes (
151- to: temporary,
152- count: buffer. data. count
153- )
154148
155149 // Calculate how many CodePoint elements we have
156- let elementCount = actualBytesCopied / MemoryLayout< Encoding . CodeUnit> . stride
150+ let elementCount = buffer . data . count / MemoryLayout< Encoding . CodeUnit> . stride
157151
158152 // Create array by copying from the buffer reinterpreted as CodePoint
159- let result : Array < Encoding . CodeUnit > = Array (
160- UnsafeBufferPointer ( start: temporary. baseAddress, count: elementCount)
161- )
153+ let result : Array < Encoding . CodeUnit > = buffer. data. withUnsafeBytes { ptr -> Array < Encoding . CodeUnit > in
154+ return Array (
155+ UnsafeBufferPointer ( start: ptr. baseAddress? . assumingMemoryBound ( to: Encoding . CodeUnit. self) , count: elementCount)
156+ )
157+ }
158+
162159 #endif
163160 return result. isEmpty ? nil : result
164161 }
@@ -180,18 +177,38 @@ extension AsyncBufferSequence {
180177 /// let formFeed = Encoding.CodeUnit(0x0C)
181178 let carriageReturn = Encoding . CodeUnit ( 0x0D )
182179 // carriageReturn + lineFeed
183- let newLine : Encoding . CodeUnit
184- let lineSeparator : Encoding . CodeUnit
185- let paragraphSeparator : Encoding . CodeUnit
180+ let newLine1 : Encoding . CodeUnit
181+ let newLine2 : Encoding . CodeUnit
182+ let lineSeparator1 : Encoding . CodeUnit
183+ let lineSeparator2 : Encoding . CodeUnit
184+ let lineSeparator3 : Encoding . CodeUnit
185+ let paragraphSeparator1 : Encoding . CodeUnit
186+ let paragraphSeparator2 : Encoding . CodeUnit
187+ let paragraphSeparator3 : Encoding . CodeUnit
186188 switch Encoding . CodeUnit. self {
187189 case is UInt8 . Type :
188- newLine = Encoding . CodeUnit ( 0xC2 ) // 0xC2 0x85
189- lineSeparator = Encoding . CodeUnit ( 0xE2 ) // 0xE2 0x80 0xA8
190- paragraphSeparator = Encoding . CodeUnit ( 0xE2 ) // 0xE2 0x80 0xA9
190+ newLine1 = Encoding . CodeUnit ( 0xC2 )
191+ newLine2 = Encoding . CodeUnit ( 0x85 )
192+
193+ lineSeparator1 = Encoding . CodeUnit ( 0xE2 )
194+ lineSeparator2 = Encoding . CodeUnit ( 0x80 )
195+ lineSeparator3 = Encoding . CodeUnit ( 0xA8 )
196+
197+ paragraphSeparator1 = Encoding . CodeUnit ( 0xE2 )
198+ paragraphSeparator2 = Encoding . CodeUnit ( 0x80 )
199+ paragraphSeparator3 = Encoding . CodeUnit ( 0xA9 )
191200 case is UInt16 . Type , is UInt32 . Type :
192- newLine = Encoding . CodeUnit ( 0x0085 )
193- lineSeparator = Encoding . CodeUnit ( 0x2028 )
194- paragraphSeparator = Encoding . CodeUnit ( 0x2029 )
201+ // UTF16 and UTF32 use one byte for all
202+ newLine1 = Encoding . CodeUnit ( 0x0085 )
203+ newLine2 = Encoding . CodeUnit ( 0x0085 )
204+
205+ lineSeparator1 = Encoding . CodeUnit ( 0x2028 )
206+ lineSeparator2 = Encoding . CodeUnit ( 0x2028 )
207+ lineSeparator3 = Encoding . CodeUnit ( 0x2028 )
208+
209+ paragraphSeparator1 = Encoding . CodeUnit ( 0x2029 )
210+ paragraphSeparator2 = Encoding . CodeUnit ( 0x2029 )
211+ paragraphSeparator3 = Encoding . CodeUnit ( 0x2029 )
195212 default :
196213 fatalError ( " Unknown encoding type \( Encoding . self) " )
197214 }
@@ -210,10 +227,13 @@ extension AsyncBufferSequence {
210227 var currentIndex : Int = self . startIndex
211228 for index in self . startIndex ..< self . buffer. count {
212229 currentIndex = index
213- // Early return if we exceed max line length
230+ // Throw if we exceed max line length
214231 if case . maxLineLength( let maxLength) = self . bufferingPolicy,
215232 currentIndex >= maxLength {
216- return yield ( at: currentIndex)
233+ throw SubprocessError (
234+ code: . init( . streamOutputExceedsLimit( maxLength) ) ,
235+ underlyingError: nil
236+ )
217237 }
218238 let byte = self . buffer [ currentIndex]
219239 switch byte {
@@ -232,12 +252,12 @@ extension AsyncBufferSequence {
232252 continue
233253 }
234254 return result
235- case newLine :
255+ case newLine1 :
236256 var targetIndex = currentIndex
237257 if Encoding . CodeUnit. self is UInt8 . Type {
238258 // For UTF8, look for the next 0x85 byte
239259 guard ( targetIndex + 1 ) < self . buffer. count,
240- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0x85 ) else {
260+ self . buffer [ targetIndex + 1 ] == newLine2 else {
241261 // Not a valid new line. Keep looking
242262 continue
243263 }
@@ -248,21 +268,22 @@ extension AsyncBufferSequence {
248268 continue
249269 }
250270 return result
251- case lineSeparator , paragraphSeparator :
271+ case lineSeparator1 , paragraphSeparator1 :
252272 var targetIndex = currentIndex
253273 if Encoding . CodeUnit. self is UInt8 . Type {
254- // For UTF8, look for the next 0x80 byte
274+ // For UTF8, look for the next byte
255275 guard ( targetIndex + 1 ) < self . buffer. count,
256- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0x80 ) else {
276+ self . buffer [ targetIndex + 1 ] == lineSeparator2 ||
277+ self . buffer [ targetIndex + 1 ] == paragraphSeparator2 else {
257278 // Not a valid new line. Keep looking
258279 continue
259280 }
260- // Swallow 0x80 byte
281+ // Swallow next byte
261282 targetIndex += 1
262- // Look for the final 0xA8 (lineSeparator) or 0xA9 (paragraphSeparator)
283+ // Look for the final byte
263284 guard ( targetIndex + 1 ) < self . buffer. count,
264- ( self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0xA8 ) ||
265- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0xA9 ) ) else {
285+ ( self . buffer [ targetIndex + 1 ] == lineSeparator3 ||
286+ self . buffer [ targetIndex + 1 ] == paragraphSeparator3 ) else {
266287 // Not a valid new line. Keep looking
267288 continue
268289 }
@@ -308,9 +329,8 @@ extension AsyncBufferSequence.LineSequence {
308329 /// on the number of buffered elements (line length).
309330 case unbounded
310331 /// Impose a max buffer size (line length) limit.
311- /// When using this policy, `LineSequence` will return a line if:
312- /// - A newline character is encountered (standard behavior)
313- /// - The current line in the buffer reaches or exceeds the specified maximum length
332+ /// Subprocess **will throw an error** if the number of buffered
333+ /// elements (line length) exceeds the limit
314334 case maxLineLength( Int )
315335 }
316336}
0 commit comments