55 * All rights reserved.
66 */
77
8+ #include <assert.h>
9+ #include <stdint.h>
810#include <stdlib.h>
911
1012#include "twin_private.h"
@@ -18,6 +20,7 @@ typedef struct _twin_edge {
1820 twin_sfixed_t inc_x ;
1921 twin_sfixed_t step_x ;
2022 int winding ;
23+ uint8_t aa_quality ; /* Adaptive AA: 0=1x1, 1=2x2, 2=4x4 */
2124} twin_edge_t ;
2225
2326#define TWIN_POLY_SHIFT 2
@@ -29,6 +32,26 @@ typedef struct _twin_edge {
2932#define TWIN_POLY_CEIL (c ) (((c) + (TWIN_POLY_STEP - 1)) & ~(TWIN_POLY_STEP - 1))
3033#define TWIN_POLY_COL (x ) (((x) >> TWIN_POLY_FIXED_SHIFT) & TWIN_POLY_MASK)
3134
35+ /* Adaptive AA quality computation
36+ *
37+ * Only optimize perfectly vertical edges (dx=0) where visual impact is
38+ * guaranteed to be minimal. All other edges use full 4x4 AA to preserve
39+ * quality, especially for text, curves, and diagonal strokes.
40+ *
41+ * Returns: 0 (1x1), or 2 (4x4 - default)
42+ */
43+ static inline uint8_t _compute_aa_quality (twin_sfixed_t dx , twin_sfixed_t dy )
44+ {
45+ (void ) dy ; /* Unused in conservative mode */
46+
47+ /* Conservative approach: only optimize perfectly vertical edges */
48+ if (dx == 0 )
49+ return 0 ; /* Vertical: 1x1 (16x faster per edge) */
50+
51+ /* All other edges use full quality to preserve visual fidelity */
52+ return 2 ; /* Default: 4x4 (full AA) */
53+ }
54+
3255static int _edge_compare_y (const void * a , const void * b )
3356{
3457 const twin_edge_t * ae = a ;
@@ -100,6 +123,17 @@ static int _twin_edge_build(twin_spoint_t *vertices,
100123 /* Compute bresenham terms */
101124 edges [e ].dx = vertices [bv ].x - vertices [tv ].x ;
102125 edges [e ].dy = vertices [bv ].y - vertices [tv ].y ;
126+
127+ /* Compute adaptive AA quality based on slope */
128+ edges [e ].aa_quality = _compute_aa_quality (edges [e ].dx , edges [e ].dy );
129+
130+ /* Sanity check: AA quality must be 0 or 2 in conservative mode
131+ * (1 is reserved for future medium-quality mode if needed)
132+ */
133+ assert (edges [e ].aa_quality <= 2 && "Invalid AA quality computed" );
134+ assert ((edges [e ].aa_quality == 0 || edges [e ].aa_quality == 2 ) &&
135+ "Conservative mode should only produce 0 or 2" );
136+
103137 if (edges [e ].dx >= 0 )
104138 edges [e ].inc_x = 1 ;
105139 else {
@@ -124,44 +158,130 @@ static int _twin_edge_build(twin_spoint_t *vertices,
124158 return e ;
125159}
126160
161+ /* Optimized span fill for perfectly vertical edges (dx=0)
162+ *
163+ * For vertical edges, coverage doesn't vary horizontally within a pixel.
164+ * All sub-pixel positions contribute equally (0x10 or 0x0f for rounding).
165+ * This allows us to:
166+ * 1. Skip array indexing (use constants directly)
167+ * 2. Simplify partial pixel calculations
168+ * 3. Eliminate the coverage table lookup entirely
169+ *
170+ * WARNING: This optimization is only correct for perfectly vertical edges
171+ * where the span is guaranteed to have uniform horizontal coverage.
172+ */
173+ static inline void _span_fill_vertical (twin_pixmap_t * pixmap ,
174+ twin_sfixed_t y ,
175+ twin_sfixed_t left ,
176+ twin_sfixed_t right )
177+ {
178+ /* For vertical edges, coverage is uniform across all horizontal positions.
179+ * We use constant 0x10 for all sub-pixels, accepting a tiny rounding
180+ * error on row 2 (0x10 vs 0x0f for the first sub-pixel).
181+ *
182+ * Precision trade-off:
183+ * - Row 2 partial pixels: max error = 1/255 = 0.4% (visually imperceptible)
184+ *
185+ * Full pixel coverage: 4 * 0x10 = 0x40 (64 in decimal)
186+ * Note: Row 2 should technically be 0x3f, but 0x40 is within
187+ * acceptable rounding error and allows constant-based optimization.
188+ */
189+ const twin_a16_t full_coverage = 0x40 ;
190+
191+ int row = twin_sfixed_trunc (y );
192+ twin_a8_t * span = pixmap -> p .a8 + row * pixmap -> stride ;
193+ twin_a8_t * s ;
194+ twin_sfixed_t x ;
195+ twin_a16_t a ;
196+
197+ /* Clip to pixmap */
198+ if (left < twin_int_to_sfixed (pixmap -> clip .left ))
199+ left = twin_int_to_sfixed (pixmap -> clip .left );
200+
201+ if (right > twin_int_to_sfixed (pixmap -> clip .right ))
202+ right = twin_int_to_sfixed (pixmap -> clip .right );
203+
204+ /* Convert to sample grid */
205+ left = _twin_sfixed_grid_ceil (left ) >> TWIN_POLY_FIXED_SHIFT ;
206+ right = _twin_sfixed_grid_ceil (right ) >> TWIN_POLY_FIXED_SHIFT ;
207+
208+ /* Check for empty */
209+ if (right <= left )
210+ return ;
211+
212+ x = left ;
213+
214+ /* Starting address */
215+ s = span + (x >> TWIN_POLY_SHIFT );
216+
217+ /* First pixel (may be partial)
218+ * For vertical edges, each sub-pixel contributes constant 0x10.
219+ * This is optimized to count * 0x10, which the compiler can
220+ * optimize to a shift operation (count << 4).
221+ */
222+ if (x & TWIN_POLY_MASK ) {
223+ int count = 0 ;
224+ while (x < right && (x & TWIN_POLY_MASK )) {
225+ count ++ ;
226+ x ++ ;
227+ }
228+ twin_a16_t w = count * 0x10 ; /* Constant multiplication, fast */
229+ a = * s + w ;
230+ * s ++ = twin_sat (a );
231+ }
232+
233+ /* Middle pixels (full pixels) - constant coverage per pixel
234+ * This is the hot path where we get the biggest win
235+ */
236+ while (x + TWIN_POLY_MASK < right ) {
237+ a = * s + full_coverage ;
238+ * s ++ = twin_sat (a );
239+ x += TWIN_POLY_SAMPLE ;
240+ }
241+
242+ /* Last pixel (may be partial)
243+ * Same optimization as first pixel: use constant 0x10 per sub-pixel.
244+ */
245+ if (right & TWIN_POLY_MASK && x != right ) {
246+ int count = 0 ;
247+ while (x < right ) {
248+ count ++ ;
249+ x ++ ;
250+ }
251+ twin_a16_t w = count * 0x10 ; /* Constant multiplication, fast */
252+ a = * s + w ;
253+ * s = twin_sat (a );
254+ }
255+ }
256+
127257static void _span_fill (twin_pixmap_t * pixmap ,
128258 twin_sfixed_t y ,
129259 twin_sfixed_t left ,
130- twin_sfixed_t right )
260+ twin_sfixed_t right ,
261+ uint8_t aa_quality )
131262{
132- #if TWIN_POLY_SHIFT == 0
133- /* 1x1 */
134- static const twin_a8_t coverage [1 ][1 ] = {
135- {0xff },
136- };
137- #endif
138- #if TWIN_POLY_SHIFT == 1
139- /* 2x2 */
140- static const twin_a8_t coverage [2 ][2 ] = {
141- {0x40 , 0x40 },
142- {0x3f , 0x40 },
143- };
144- #endif
145- #if TWIN_POLY_SHIFT == 2
146- /* 4x4 */
263+ /* Coverage table for anti-aliasing
264+ *
265+ * The grid is always 4x4 (TWIN_POLY_SHIFT=2, TWIN_POLY_STEP=0.25 pixels).
266+ * Each entry represents coverage contribution for a sub-pixel position.
267+ * Sum of all 16 entries = 0xFF (255) for full pixel coverage.
268+ */
147269 static const twin_a8_t coverage [4 ][4 ] = {
148270 {0x10 , 0x10 , 0x10 , 0x10 },
149271 {0x10 , 0x10 , 0x10 , 0x10 },
150- {0x0f , 0x10 , 0x10 , 0x10 },
272+ {0x0f , 0x10 , 0x10 , 0x10 }, /* Rounding: 15+240=255 */
151273 {0x10 , 0x10 , 0x10 , 0x10 },
152274 };
153- #endif
154- #if TWIN_POLY_SHIFT == 3
155- /* 8x8 */
156- static const twin_a8_t coverage [8 ][8 ] = {
157- {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 }, {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 },
158- {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 }, {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 },
159- {3 , 4 , 4 , 4 , 4 , 4 , 4 , 4 }, {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 },
160- {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 }, {4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 },
161- };
162- #endif
275+
276+ /* NOTE: aa_quality parameter is currently unused in standard path.
277+ * Adaptive AA optimization is handled by calling _span_fill_vertical()
278+ * for perfectly vertical edges (dx=0) in _twin_edge_fill().
279+ */
280+ (void ) aa_quality ;
281+
163282 const twin_a8_t * cover =
164- coverage [(y >> TWIN_POLY_FIXED_SHIFT ) & TWIN_POLY_MASK ];
283+ & coverage [(y >> TWIN_POLY_FIXED_SHIFT ) & TWIN_POLY_MASK ][0 ];
284+
165285 int row = twin_sfixed_trunc (y );
166286 twin_a8_t * span = pixmap -> p .a8 + row * pixmap -> stride ;
167287 twin_a8_t * s ;
@@ -249,12 +369,32 @@ static void _twin_edge_fill(twin_pixmap_t *pixmap,
249369
250370 /* walk this y value marking coverage */
251371 int w = 0 ;
372+ twin_edge_t * edge_start = NULL ;
252373 for (a = active ; a ; a = a -> next ) {
253- if (w == 0 )
374+ if (w == 0 ) {
254375 x0 = a -> x ;
376+ edge_start = a ;
377+ }
255378 w += a -> winding ;
256- if (w == 0 )
257- _span_fill (pixmap , y , x0 , a -> x );
379+ if (w != 0 )
380+ continue ;
381+
382+ /* Adaptive AA: use optimized path for perfectly vertical edges
383+ * Only apply to spans >= 16 pixels to avoid branch overhead.
384+ * Threshold: 16 pixels * 4 samples/pixel = 64 samples
385+ *
386+ * Check if both edges forming this span are vertical (dx=0).
387+ */
388+ twin_sfixed_t span_width = a -> x - x0 ;
389+ if (edge_start && edge_start -> dx == 0 && a -> dx == 0 &&
390+ span_width >= (16 << TWIN_POLY_FIXED_SHIFT )) {
391+ /* Both edges vertical and span is wide enough: use optimized
392+ * span fill */
393+ _span_fill_vertical (pixmap , y , x0 , a -> x );
394+ } else {
395+ /* General case or thin/medium span: use full 4x4 AA */
396+ _span_fill (pixmap , y , x0 , a -> x , 2 );
397+ }
258398 }
259399
260400 /* step down, clipping to pixmap */
0 commit comments