@@ -26,6 +26,28 @@ impl ContextSize {
2626 }
2727}
2828
29+ /// Represents the type of a line in a unified diff.
30+ #[ doc( alias = "git2" ) ]
31+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
32+ pub enum DiffLineType {
33+ /// A line that exists in both the old and the new version is called a context line.
34+ Context ,
35+ /// A line that was added in the new version.
36+ Add ,
37+ /// A line that was removed from the old version.
38+ Remove ,
39+ }
40+
41+ impl DiffLineType {
42+ const fn to_prefix ( self ) -> char {
43+ match self {
44+ DiffLineType :: Context => ' ' ,
45+ DiffLineType :: Add => '+' ,
46+ DiffLineType :: Remove => '-' ,
47+ }
48+ }
49+ }
50+
2951/// Specify where to put a newline.
3052#[ derive( Debug , Copy , Clone ) ]
3153pub enum NewlineSeparator < ' a > {
@@ -39,31 +61,45 @@ pub enum NewlineSeparator<'a> {
3961 AfterHeaderAndWhenNeeded ( & ' a str ) ,
4062}
4163
64+ /// Holds information about a unified diff hunk, specifically with respect to line numbers.
65+ pub struct HunkHeader {
66+ /// The 1-based start position in the 'before' lines.
67+ pub before_hunk_start : u32 ,
68+ /// The size of the 'before' hunk in lines.
69+ pub before_hunk_len : u32 ,
70+ /// The 1-based start position in the 'after' lines.
71+ pub after_hunk_start : u32 ,
72+ /// The size of the 'after' hunk in lines.
73+ pub after_hunk_len : u32 ,
74+ }
75+
76+ impl std:: fmt:: Display for HunkHeader {
77+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
78+ write ! (
79+ f,
80+ "@@ -{},{} +{},{} @@" ,
81+ self . before_hunk_start, self . before_hunk_len, self . after_hunk_start, self . after_hunk_len
82+ )
83+ }
84+ }
85+
4286/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
4387pub trait ConsumeHunk {
4488 /// The item this instance produces after consuming all hunks.
4589 type Out ;
4690
47- /// Consume a single ` hunk` in unified diff format, that would be prefixed with `header`.
48- /// Note that all newlines are added .
91+ /// Consume a single hunk. Note that it is the implementation's responsibility to add newlines
92+ /// where requested by `newline` .
4993 ///
5094 /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
5195 /// After this method returned its first error, it will not be called anymore.
52- ///
53- /// The following is hunk-related information and the same that is used in the `header`.
54- /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
55- /// * `before_hunk_len` the amount of lines of this hunk in the old file.
56- /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
57- /// * `after_hunk_len` the amount of lines of this hunk in the new file.
5896 fn consume_hunk (
5997 & mut self ,
60- before_hunk_start : u32 ,
61- before_hunk_len : u32 ,
62- after_hunk_start : u32 ,
63- after_hunk_len : u32 ,
64- header : & str ,
65- hunk : & [ u8 ] ,
98+ header : HunkHeader ,
99+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
100+ newline : NewlineSeparator < ' _ > ,
66101 ) -> std:: io:: Result < ( ) > ;
102+
67103 /// Called after the last hunk is consumed to produce an output.
68104 fn finish ( self ) -> Self :: Out ;
69105}
@@ -75,14 +111,10 @@ pub(super) mod _impl {
75111 use imara_diff:: { intern, Sink } ;
76112 use intern:: { InternedInput , Interner , Token } ;
77113
78- use super :: { ConsumeHunk , ContextSize , NewlineSeparator } ;
79-
80- const CONTEXT : char = ' ' ;
81- const ADDITION : char = '+' ;
82- const REMOVAL : char = '-' ;
114+ use super :: { ConsumeHunk , ContextSize , DiffLineType , HunkHeader , NewlineSeparator } ;
83115
84- /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu- diff` if the `-u` option is used,
85- /// and passes it in full to a consumer .
116+ /// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the
117+ /// format typically output by `git` or `gnu-diff` if the `-u` option is used .
86118 pub struct UnifiedDiff < ' a , T , D >
87119 where
88120 T : Hash + Eq + AsRef < [ u8 ] > ,
@@ -108,8 +140,8 @@ pub(super) mod _impl {
108140 ctx_size : u32 ,
109141 newline : NewlineSeparator < ' a > ,
110142
111- buffer : Vec < u8 > ,
112- header_buf : String ,
143+ buffer : Vec < ( DiffLineType , Vec < u8 > ) > ,
144+
113145 delegate : D ,
114146
115147 err : Option < std:: io:: Error > ,
@@ -120,12 +152,13 @@ pub(super) mod _impl {
120152 T : Hash + Eq + AsRef < [ u8 ] > ,
121153 D : ConsumeHunk ,
122154 {
123- /// Create a new instance to create unified diff using the lines in `input`,
155+ /// Create a new instance to create a unified diff using the lines in `input`,
124156 /// which also must be used when running the diff algorithm.
125157 /// `context_size` is the amount of lines around each hunk which will be passed
126- ///to `consume_hunk`.
158+ /// to `consume_hunk`.
127159 ///
128- /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
160+ /// `consume_hunk` is called for each hunk with all the information required to create a
161+ /// unified diff.
129162 pub fn new (
130163 input : & ' a InternedInput < T > ,
131164 consume_hunk : D ,
@@ -147,28 +180,16 @@ pub(super) mod _impl {
147180 newline : newline_separator,
148181
149182 buffer : Vec :: with_capacity ( 8 ) ,
150- header_buf : String :: new ( ) ,
151183 delegate : consume_hunk,
152184
153185 err : None ,
154186 }
155187 }
156188
157- fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
189+ fn print_tokens ( & mut self , tokens : & [ Token ] , line_type : DiffLineType ) {
158190 for & token in tokens {
159- self . buffer . push_char ( prefix) ;
160- let line = & self . interner [ token] ;
161- self . buffer . push_str ( line) ;
162- match self . newline {
163- NewlineSeparator :: AfterHeaderAndLine ( nl) => {
164- self . buffer . push_str ( nl) ;
165- }
166- NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
167- if !line. as_ref ( ) . ends_with_str ( nl) {
168- self . buffer . push_str ( nl) ;
169- }
170- }
171- }
191+ let content = self . interner [ token] . as_ref ( ) . to_vec ( ) ;
192+ self . buffer . push ( ( line_type, content) ) ;
172193 }
173194 }
174195
@@ -183,38 +204,35 @@ pub(super) mod _impl {
183204
184205 let hunk_start = self . before_hunk_start + 1 ;
185206 let hunk_end = self . after_hunk_start + 1 ;
186- self . header_buf . clear ( ) ;
187- std:: fmt:: Write :: write_fmt (
188- & mut self . header_buf ,
189- format_args ! (
190- "@@ -{},{} +{},{} @@{nl}" ,
191- hunk_start,
192- self . before_hunk_len,
193- hunk_end,
194- self . after_hunk_len,
195- nl = match self . newline {
196- NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
197- nl
198- }
199- }
200- ) ,
201- )
202- . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
203- self . delegate . consume_hunk (
204- hunk_start,
205- self . before_hunk_len ,
206- hunk_end,
207- self . after_hunk_len ,
208- & self . header_buf ,
209- & self . buffer ,
210- ) ?;
207+
208+ let header = HunkHeader {
209+ before_hunk_start : hunk_start,
210+ before_hunk_len : self . before_hunk_len ,
211+ after_hunk_start : hunk_end,
212+ after_hunk_len : self . after_hunk_len ,
213+ } ;
214+
215+ // TODO:
216+ // If I remember correctly, we wanted this buffer to be reused between calls, but I ran
217+ // into lifetime issues when I tried to make it a field on `UnifiedDiff`.
218+ let buffer2: Vec < ( DiffLineType , & [ u8 ] ) > = self
219+ . buffer
220+ . iter ( )
221+ . map ( |( line_type, content) | ( * line_type, content. as_slice ( ) ) )
222+ . collect ( ) ;
223+
224+ self . delegate . consume_hunk ( header, & buffer2, self . newline ) ?;
211225
212226 self . reset_hunks ( ) ;
213227 Ok ( ( ) )
214228 }
215229
216230 fn print_context_and_update_pos ( & mut self , print : Range < u32 > , move_to : u32 ) {
217- self . print_tokens ( & self . before [ print. start as usize ..print. end as usize ] , CONTEXT ) ;
231+ self . print_tokens (
232+ & self . before [ print. start as usize ..print. end as usize ] ,
233+ DiffLineType :: Context ,
234+ ) ;
235+
218236 let len = print. end - print. start ;
219237 self . ctx_pos = Some ( move_to) ;
220238 self . before_hunk_len += len;
@@ -270,8 +288,11 @@ pub(super) mod _impl {
270288 self . before_hunk_len += before. end - before. start ;
271289 self . after_hunk_len += after. end - after. start ;
272290
273- self . print_tokens ( & self . before [ before. start as usize ..before. end as usize ] , REMOVAL ) ;
274- self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , ADDITION ) ;
291+ self . print_tokens (
292+ & self . before [ before. start as usize ..before. end as usize ] ,
293+ DiffLineType :: Remove ,
294+ ) ;
295+ self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , DiffLineType :: Add ) ;
275296 }
276297
277298 fn finish ( mut self ) -> Self :: Out {
@@ -289,12 +310,32 @@ pub(super) mod _impl {
289310 impl ConsumeHunk for String {
290311 type Out = Self ;
291312
292- fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
293- self . push_str ( header) ;
294- self . push_str (
295- hunk. to_str ( )
296- . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
297- ) ;
313+ fn consume_hunk (
314+ & mut self ,
315+ header : HunkHeader ,
316+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
317+ newline : NewlineSeparator < ' _ > ,
318+ ) -> std:: io:: Result < ( ) > {
319+ self . push_str ( & header. to_string ( ) ) ;
320+ self . push_str ( match newline {
321+ NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => nl,
322+ } ) ;
323+
324+ for & ( line_type, content) in lines {
325+ self . push ( line_type. to_prefix ( ) ) ;
326+ self . push_str ( std:: str:: from_utf8 ( content) . map_err ( |e| std:: io:: Error :: new ( ErrorKind :: Other , e) ) ?) ;
327+
328+ match newline {
329+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
330+ self . push_str ( nl) ;
331+ }
332+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
333+ if !content. ends_with_str ( nl) {
334+ self . push_str ( nl) ;
335+ }
336+ }
337+ }
338+ }
298339 Ok ( ( ) )
299340 }
300341
@@ -307,9 +348,32 @@ pub(super) mod _impl {
307348 impl ConsumeHunk for Vec < u8 > {
308349 type Out = Self ;
309350
310- fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
311- self . push_str ( header) ;
312- self . push_str ( hunk) ;
351+ fn consume_hunk (
352+ & mut self ,
353+ header : HunkHeader ,
354+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
355+ newline : NewlineSeparator < ' _ > ,
356+ ) -> std:: io:: Result < ( ) > {
357+ self . push_str ( header. to_string ( ) ) ;
358+ self . push_str ( match newline {
359+ NewlineSeparator :: AfterHeaderAndLine ( nl) | NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => nl,
360+ } ) ;
361+
362+ for & ( line_type, content) in lines {
363+ self . push ( line_type. to_prefix ( ) as u8 ) ;
364+ self . extend_from_slice ( content) ;
365+
366+ match newline {
367+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
368+ self . push_str ( nl) ;
369+ }
370+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
371+ if !content. ends_with_str ( nl) {
372+ self . push_str ( nl) ;
373+ }
374+ }
375+ }
376+ }
313377 Ok ( ( ) )
314378 }
315379
0 commit comments