From 72a592031c0d26d194bf4da64e22d29e21acb33f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 3 Nov 2025 11:29:50 -0700 Subject: [PATCH 1/4] grok_bin_oct_hex: Move declarations close to first use --- numeric.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/numeric.c b/numeric.c index 60ee37c8ead2..d90454f16730 100644 --- a/numeric.c +++ b/numeric.c @@ -376,23 +376,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, ) { - const char *s0 = start; - const char *s; - STRLEN len = *len_p; - STRLEN bytes_so_far; /* How many real digits have been processed */ - UV value = 0; - NV value_nv = 0; - const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ - const UV max_div= UV_MAX / base; /* Value above which, the next digit - processed would overflow */ - const I32 input_flags = *flags; - const bool allow_underscores = - cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); - bool overflowed = FALSE; - - /* In overflows, this keeps track of how much to multiply the overflowed NV - * by as we continue to parse the remaining digits */ - NV factor = 0; + PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX; + ASSUME(inRANGE(shift, 1, 4) && shift != 2); /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to @@ -410,13 +395,16 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, * ... */ - PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX; - - ASSUME(inRANGE(shift, 1, 4) && shift != 2); - + const I32 input_flags = *flags; /* Clear output flags; unlikely to find a problem that sets them */ *flags = 0; + const bool allow_underscores = + cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); + const char * s = start; + const char * s0 = s; /* Where the significant digits start */ + STRLEN len = *len_p; + if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b; x or 0x. @@ -435,6 +423,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, } s = s0; /* s0 potentially advanced from 'start' */ + UV value = 0; /* Unroll the loop so that the first 8 digits are branchless except for the * switch. A ninth hex one overflows a 32 bit word. */ @@ -488,10 +477,20 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, break; } - bytes_so_far = s - s0; - factor = shift << bytes_so_far; + /* How many real digits have been processed */ + STRLEN bytes_so_far = s - s0; + + /* In overflows, this keeps track of how much to multiply the overflowed NV + * by as we continue to parse the remaining digits */ + NV factor = shift << bytes_so_far; len -= bytes_so_far; + bool overflowed = FALSE; + NV value_nv = 0; + const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ + const UV max_div= UV_MAX / base; /* Value above which, the next digit + processed would overflow */ + for (; len--; s++) { if (generic_isCC_(*s, class_bit)) { /* Write it in this wonky order with a goto to attempt to get the From 68a142d81e7480390ae932aaf4b4995265d285e1 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Nov 2025 10:21:33 -0700 Subject: [PATCH 2/4] grok_bin_oct_hex: Use upper bound, not length remaining Creating an upper limit to parse allows us to write while (s < e) for example, and that limit is constant, requiring fewer operations than the other way, where the remaining length keeps getting changed. It also allows this commit to move an 's++' a couple of lines to get rid of comparing against the number '8' which could get out of sync. --- numeric.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/numeric.c b/numeric.c index d90454f16730..27671fd1facd 100644 --- a/numeric.c +++ b/numeric.c @@ -403,21 +403,22 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); const char * s = start; const char * s0 = s; /* Where the significant digits start */ - STRLEN len = *len_p; + const char * e = start + *len_p; if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b; x or 0x. for compatibility silently suffer "b" and "0b" as valid binary; "x" and "0x" as valid hex numbers. */ - if (len >= 1) { + if (e - s > 1) { if (isALPHA_FOLD_EQ(s0[0], prefix)) { s0++; - len--; } - else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) { + else if ( e - s > 2 + && s0[0] == '0' + && (isALPHA_FOLD_EQ(s0[1], prefix))) + { s0+=2; - len-=2; } } } @@ -427,7 +428,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, /* Unroll the loop so that the first 8 digits are branchless except for the * switch. A ninth hex one overflows a 32 bit word. */ - switch (len) { + switch (e - s) { case 0: return 0; default: @@ -468,12 +469,12 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, case 1: if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; value = (value << shift) | XDIGIT_VALUE(*s); + s++; - if (LIKELY(len <= 8)) { + if (LIKELY(s >= e)) { return value; } - s++; break; } @@ -483,7 +484,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, /* In overflows, this keeps track of how much to multiply the overflowed NV * by as we continue to parse the remaining digits */ NV factor = shift << bytes_so_far; - len -= bytes_so_far; bool overflowed = FALSE; NV value_nv = 0; @@ -491,7 +491,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, const UV max_div= UV_MAX / base; /* Value above which, the next digit processed would overflow */ - for (; len--; s++) { + for (; s < e; s++) { if (generic_isCC_(*s, class_bit)) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. @@ -540,7 +540,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, } if ( *s == '_' - && len + && s < e - 1 && allow_underscores && generic_isCC_(s[1], class_bit) @@ -550,7 +550,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES) != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES))) { - --len; ++s; goto redo; } From d4b3580e9898bdf8d678e0fc80d188f18a8c6898 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Nov 2025 10:31:14 -0700 Subject: [PATCH 3/4] grok_bin_oct_hex: Move case: in switch() This removes a special case, paving the way for potential future generalizations. --- numeric.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numeric.c b/numeric.c index 27671fd1facd..e67302f2730b 100644 --- a/numeric.c +++ b/numeric.c @@ -429,8 +429,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, /* Unroll the loop so that the first 8 digits are branchless except for the * switch. A ninth hex one overflows a 32 bit word. */ switch (e - s) { - case 0: - return 0; default: if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; value = (value << shift) | XDIGIT_VALUE(*s); @@ -470,7 +468,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; value = (value << shift) | XDIGIT_VALUE(*s); s++; - + /* FALLTHROUGH */ + case 0: if (LIKELY(s >= e)) { return value; } From b89e80f130914a80bc47fa72abd81cbaf9b7b52b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 4 Nov 2025 10:34:02 -0700 Subject: [PATCH 4/4] grok_bin_oct_hex: Move declaration of variable to first need This caused an extra assignment that was then discarded --- numeric.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/numeric.c b/numeric.c index e67302f2730b..351435cbc627 100644 --- a/numeric.c +++ b/numeric.c @@ -402,7 +402,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, const bool allow_underscores = cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); const char * s = start; - const char * s0 = s; /* Where the significant digits start */ const char * e = start + *len_p; if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) { @@ -411,19 +410,19 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, for compatibility silently suffer "b" and "0b" as valid binary; "x" and "0x" as valid hex numbers. */ if (e - s > 1) { - if (isALPHA_FOLD_EQ(s0[0], prefix)) { - s0++; + if (isALPHA_FOLD_EQ(s[0], prefix)) { + s++; } else if ( e - s > 2 - && s0[0] == '0' - && (isALPHA_FOLD_EQ(s0[1], prefix))) + && s[0] == '0' + && (isALPHA_FOLD_EQ(s[1], prefix))) { - s0+=2; + s += 2; } } } - s = s0; /* s0 potentially advanced from 'start' */ + const char * const s0 = s; /* Where the significant digits start */ UV value = 0; /* Unroll the loop so that the first 8 digits are branchless except for the