diff --git a/numeric.c b/numeric.c index 60ee37c8ead2..351435cbc627 100644 --- a/numeric.c +++ b/numeric.c @@ -376,23 +376,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, ) { - const char *s0 = start; - const char *s; - STRLEN len = *len_p; - STRLEN bytes_so_far; /* How many real digits have been processed */ - UV value = 0; - NV value_nv = 0; - const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ - const UV max_div= UV_MAX / base; /* Value above which, the next digit - processed would overflow */ - const I32 input_flags = *flags; - const bool allow_underscores = - cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); - bool overflowed = FALSE; - - /* In overflows, this keeps track of how much to multiply the overflowed NV - * by as we continue to parse the remaining digits */ - NV factor = 0; + PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX; + ASSUME(inRANGE(shift, 1, 4) && shift != 2); /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to @@ -410,37 +395,39 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, * ... */ - PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX; - - ASSUME(inRANGE(shift, 1, 4) && shift != 2); - + const I32 input_flags = *flags; /* Clear output flags; unlikely to find a problem that sets them */ *flags = 0; + const bool allow_underscores = + cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); + const char * s = start; + const char * e = start + *len_p; + if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b; x or 0x. for compatibility silently suffer "b" and "0b" as valid binary; "x" and "0x" as valid hex numbers. */ - if (len >= 1) { - if (isALPHA_FOLD_EQ(s0[0], prefix)) { - s0++; - len--; + if (e - s > 1) { + if (isALPHA_FOLD_EQ(s[0], prefix)) { + s++; } - else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) { - s0+=2; - len-=2; + else if ( e - s > 2 + && s[0] == '0' + && (isALPHA_FOLD_EQ(s[1], prefix))) + { + s += 2; } } } - s = s0; /* s0 potentially advanced from 'start' */ + const char * const s0 = s; /* Where the significant digits start */ + UV value = 0; /* Unroll the loop so that the first 8 digits are branchless except for the * switch. A ninth hex one overflows a 32 bit word. */ - switch (len) { - case 0: - return 0; + switch (e - s) { default: if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; value = (value << shift) | XDIGIT_VALUE(*s); @@ -479,20 +466,30 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, case 1: if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; value = (value << shift) | XDIGIT_VALUE(*s); - - if (LIKELY(len <= 8)) { + s++; + /* FALLTHROUGH */ + case 0: + if (LIKELY(s >= e)) { return value; } - s++; break; } - bytes_so_far = s - s0; - factor = shift << bytes_so_far; - len -= bytes_so_far; + /* How many real digits have been processed */ + STRLEN bytes_so_far = s - s0; + + /* In overflows, this keeps track of how much to multiply the overflowed NV + * by as we continue to parse the remaining digits */ + NV factor = shift << bytes_so_far; + + bool overflowed = FALSE; + NV value_nv = 0; + const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ + const UV max_div= UV_MAX / base; /* Value above which, the next digit + processed would overflow */ - for (; len--; s++) { + for (; s < e; s++) { if (generic_isCC_(*s, class_bit)) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. @@ -541,7 +538,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, } if ( *s == '_' - && len + && s < e - 1 && allow_underscores && generic_isCC_(s[1], class_bit) @@ -551,7 +548,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES) != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES))) { - --len; ++s; goto redo; }