Skip to content
Open
80 changes: 67 additions & 13 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,52 +351,79 @@ parse_number_string(UC const *p, UC const *pend,
}
}
UC const *const start_digits = p;
const UC separator = options.digit_separator;
const bool has_separator = (separator != UC('\0'));

uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
int64_t digit_count = 0;
UC const *first_digit_ptr = nullptr;

while ((p != pend) && is_integer(*p)) {
while (p != pend) {
if (has_separator && *p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
if (digit_count == 0) {
first_digit_ptr = p;
}
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p -
UC('0')); // might overflow, we will handle the overflow later
++p;
++digit_count;
}
UC const *const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = span<UC const>(start_digits, size_t(digit_count));
answer.integer =
span<UC const>(start_digits, size_t(end_of_integer_part - start_digits));
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0) {
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
}
if ((start_digits[0] == UC('0') && digit_count > 1)) {
if (digit_count > 1 && *first_digit_ptr == UC('0')) {
return report_parse_error<UC>(start_digits,
parse_error::leading_zeros_in_integer_part);
}
}

int64_t exponent = 0;
int64_t fractional_digit_count = 0;
bool const has_decimal_point = (p != pend) && (*p == decimal_point);
if (has_decimal_point) {
++p;
UC const *before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
loop_parse_if_eight_digits(p, pend, i);
if (!has_separator) {
loop_parse_if_eight_digits(p, pend, i);
fractional_digit_count += int64_t(p - before);
}

while ((p != pend) && is_integer(*p)) {
while (p != pend) {
if (has_separator && *p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
uint8_t digit = uint8_t(*p - UC('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
++fractional_digit_count;
}
exponent = before - p;
exponent = -fractional_digit_count;
answer.fraction = span<UC const>(before, size_t(p - before));
digit_count -= exponent;
digit_count += fractional_digit_count;
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in fractional part
if (has_decimal_point && exponent == 0) {
if (has_decimal_point && fractional_digit_count == 0) {
return report_parse_error<UC>(p,
parse_error::no_digits_in_fractional_part);
}
Expand Down Expand Up @@ -434,7 +461,14 @@ parse_number_string(UC const *p, UC const *pend,
// Otherwise, we will be ignoring the 'e'.
p = location_of_e;
} else {
while ((p != pend) && is_integer(*p)) {
while (p != pend) {
if (has_separator && *p == separator) {
++p;
continue;
}
if (!is_integer(*p)) {
break;
}
uint8_t digit = uint8_t(*p - UC('0'));
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
Expand Down Expand Up @@ -467,7 +501,8 @@ parse_number_string(UC const *p, UC const *pend,
// We need to be mindful of the case where we only have zeroes...
// E.g., 0.000000000...000.
UC const *start = start_digits;
while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
(has_separator && *start == separator))) {
if (*start == UC('0')) {
digit_count--;
}
Expand All @@ -484,19 +519,38 @@ parse_number_string(UC const *p, UC const *pend,
UC const *int_end = p + answer.integer.len();
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
if (has_separator && *p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
int64_t remaining_integer_digits = 0;
while (p != int_end) {
if (has_separator && *p == separator) {
++p;
continue;
}
++p;
++remaining_integer_digits;
}
exponent = remaining_integer_digits + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
int64_t fraction_digits_consumed = 0;
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
if (has_separator && *p == separator) {
++p;
continue;
}
i = i * 10 + uint64_t(*p - UC('0'));
++p;
++fraction_digits_consumed;
}
exponent = answer.fraction.ptr - p + exp_number;
exponent = exp_number - fraction_digits_consumed;
}
// We have now corrected both exponent and i, to a truncated value
}
Expand Down
14 changes: 12 additions & 2 deletions include/fast_float/float_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,25 @@ using from_chars_result = from_chars_result_t<char>;

template <typename UC> struct parse_options_t {
constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
UC dot = UC('.'), int b = 10)
: format(fmt), decimal_point(dot), base(b) {}
UC dot = UC('.'), int b = 10,
UC sep = UC('\0'), uint8_t opts = 0)
: format(fmt), decimal_point(dot), base(b), digit_separator(sep),
format_options(opts) {}

/** Which number formats are accepted */
chars_format format;
/** The character used as decimal point */
UC decimal_point;
/** The base used for integers */
int base;
/** The character used as digit separator. Use '\0' to
* disable */
UC digit_separator;
/** Additional format options (bitmask) */
uint8_t format_options;

/** Option to skip prefixes like 0x, 0b */
static constexpr uint8_t skip_prefix = 1;
};

using parse_options = parse_options_t<char>;
Expand Down
7 changes: 7 additions & 0 deletions include/fast_float/parse_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,13 @@ template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> options) noexcept {
if (((options.format_options & parse_options_t<UC>::skip_prefix) != 0) &&
(last - first >= 2) && (*first == UC('0'))) {
const UC c_low = UC(first[1] | UC(0x20));
if (c_low == UC('x') || c_low == UC('b')) {
first += 2;
}
}
return from_chars_advanced_caller<
size_t(is_supported_float_type<T>::value) +
2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
Expand Down
53 changes: 52 additions & 1 deletion tests/basictest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,57 @@ TEST_CASE("decimal_point_parsing") {
}
}

TEST_CASE("digit_separator") {
double result;
fast_float::parse_options options{};
options.digit_separator = '_';
{
std::string const input = "1_000";
auto answer = fast_float::from_chars_advanced(
input.data(), input.data() + input.size(), result, options);
CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
"Parsing should have stopped at end");
CHECK_EQ(result, 1000.0);
}
{
std::string const input = "1.00_5";
auto answer = fast_float::from_chars_advanced(
input.data(), input.data() + input.size(), result, options);
CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
"Parsing should have stopped at end");
CHECK_EQ(result, 1.005);
}
{
std::string const input = "1e1_0";
auto answer = fast_float::from_chars_advanced(
input.data(), input.data() + input.size(), result, options);
CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
"Parsing should have stopped at end");
CHECK_EQ(result, 1e10);
}
{
std::string const input = "1_5e1_2";
auto answer = fast_float::from_chars_advanced(
input.data(), input.data() + input.size(), result, options);
CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
"Parsing should have stopped at end");
CHECK_EQ(result, 15e12);
}
{
std::string const input = "1_5.0_5e1_2";
auto answer = fast_float::from_chars_advanced(
input.data(), input.data() + input.size(), result, options);
CHECK_MESSAGE(answer.ec == std::errc(), "expected parse success");
CHECK_MESSAGE(answer.ptr == input.data() + input.size(),
"Parsing should have stopped at end");
CHECK_EQ(result, 15.05e12);
}
}

TEST_CASE("issue19") {
std::string const input = "234532.3426362,7869234.9823,324562.645";
double result;
Expand Down Expand Up @@ -2452,4 +2503,4 @@ TEST_CASE("integer_times_pow10") {
all::verify_integer_times_pow10(std::numeric_limits<uint64_t>::max(), 42);
all::verify_integer_times_pow10(std::numeric_limits<uint64_t>::max(), -42);
}
}
}