wustil: Add David Gay's dtoa for shortest sufficient double representation
For filtering with equality on FT_DOUBLEs, we need to produce a string value which, when converted back to a double, will compare as equal to the original value. That can be achieved by using a printf %g conversion with DBL_DECIMAL_DIG (as g_ascii_dtostr does), and that's what the FTREPR_DFILTER string representation currently does. Note that DBL_DECIMAL_DIG is not the same as DBL_DIG, which is the number of significant digits guaranteed not to change when converting a string to a double and back to a string. Using only DBL_DIG significant digits is not sufficient; for example, the IEEE 754 double precision binary floating point number closest to 1/7 (0x1.2492492492492p-3 in hexadecimal floating point notation) converted with DBL_DIG siginficant digits yields the string "0.142857142857143". That string if converted back to a double produces 0x1.2492492492497p-3, which does *NOT* compare as equal. A longer string, such as "0.14285714285714285", is necessary for round trip conversion back to a double to work. However, there are many strings with between DBL_DIG and DBL_DECIMAL_DIG significant digits that convert to the same double; the one with the largest number of trailing zeros (that thus becomes shortest after removing the trailing zeroes) may not be the numerically closest to the exact floating point number and thus not the one derived via the direct algorithm. E.g., the doubles precisely expressed as 0x1.999999999999ap-3 and 0x1.3333333333333p-2 in hexadecimal floating point notation are closer to 0.20000000000000001 than 0.2 and 0.29999999999999999 than to 0.3, respectively, even though both decimals convert to the same IEEE 754 binary double. David M. Gay's dtoa implementation (https://www.netlib.org/fp/) is a long standing implementation used in a variety of projects (such as Python - https://github.com/python/cpython/blob/main/Python/dtoa.c ). Remove the strtod implementation, as we don't need it (it was added in C99 and C++11). Change a few things to deal with compiler warnings (such as the comments for FALLTHROUGH in case statements and casts.) There's probably some more ifdef'ed code that could be removed as well, as Python did. There are a number of C++ libraries that implement apparently faster algorithms, but few other C implementations. Also note that C++17 added std::to_chars that produces the same result on floating point numbers (though the difficulty in implementation meant that this was one of the last C++17 features added to most C++ standard library implementations.) Add its MIT-like license (see https://spdx.org/licenses/dtoa.html) to licensecheck. Ping #16483 This achieves the goal of presenting the same shortest result in the protocol tree for FT_DOUBLEs in the tree as well as in the filter and serializing. Note this only produces the shortest result for *doubles*; extending this to IEEE 754 single precision floating point requires using the generalized version of the library (which may be necessary.)
This commit is contained in:
parent
4d8fea7f32
commit
1120beb52e
@ -11,6 +11,7 @@
|
||||
#include <ftypes-int.h>
|
||||
#include <float.h>
|
||||
#include <wsutil/array.h>
|
||||
#include <wsutil/dtoa.h>
|
||||
|
||||
static void
|
||||
double_fvalue_new(fvalue_t *fv)
|
||||
@ -56,20 +57,26 @@ float_val_to_repr(wmem_allocator_t *scope, const fvalue_t *fv, ftrepr_t rtype, i
|
||||
{
|
||||
char *buf = wmem_alloc(scope, G_ASCII_DTOSTR_BUF_SIZE);
|
||||
if (rtype == FTREPR_DFILTER)
|
||||
g_ascii_dtostr(buf, G_ASCII_DTOSTR_BUF_SIZE, fv->value.floating);
|
||||
dtoa_g_fmt(buf, fv->value.floating);
|
||||
else
|
||||
g_ascii_formatd(buf, G_ASCII_DTOSTR_BUF_SIZE, "%." G_STRINGIFY(FLT_DIG) "g", fv->value.floating);
|
||||
g_ascii_formatd(buf, G_ASCII_DTOSTR_BUF_SIZE, "%." G_STRINGIFY(FLT_DECIMAL_DIG) "g", fv->value.floating);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static char *
|
||||
double_val_to_repr(wmem_allocator_t *scope, const fvalue_t *fv, ftrepr_t rtype, int field_display _U_)
|
||||
double_val_to_repr(wmem_allocator_t *scope, const fvalue_t *fv, ftrepr_t rtype _U_, int field_display _U_)
|
||||
{
|
||||
/* XXX - We prefer the g fmt here because it's always exact enough for
|
||||
* serialization and equality testing. We could also use dtoa to write
|
||||
* an acceptable for serialization and testing BASE_EXP format. We
|
||||
* could output in hex floating point if field_display is BASE_HEX as
|
||||
* it's always exact too, but less widely supported (JSON, XML, others
|
||||
* don't handle it.) BASE_DEC is just always a bad idea for equality
|
||||
* testing and serialization, unless you want to allow for strings up
|
||||
* to 308 characters.
|
||||
*/
|
||||
char *buf = wmem_alloc(scope, G_ASCII_DTOSTR_BUF_SIZE);
|
||||
if (rtype == FTREPR_DFILTER)
|
||||
g_ascii_dtostr(buf, G_ASCII_DTOSTR_BUF_SIZE, fv->value.floating);
|
||||
else
|
||||
g_ascii_formatd(buf, G_ASCII_DTOSTR_BUF_SIZE, "%." G_STRINGIFY(DBL_DIG) "g", fv->value.floating);
|
||||
dtoa_g_fmt(buf, fv->value.floating);
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
13
epan/proto.c
13
epan/proto.c
@ -31,6 +31,7 @@
|
||||
#include <wsutil/wslog.h>
|
||||
#include <wsutil/ws_assert.h>
|
||||
#include <wsutil/unicode-utils.h>
|
||||
#include <wsutil/dtoa.h>
|
||||
|
||||
#include <ftypes/ftypes.h>
|
||||
|
||||
@ -10689,7 +10690,6 @@ static size_t
|
||||
fill_display_label_float(const field_info *fi, char *label_str)
|
||||
{
|
||||
int display;
|
||||
int digits;
|
||||
int n;
|
||||
double value;
|
||||
|
||||
@ -10705,12 +10705,11 @@ fill_display_label_float(const field_info *fi, char *label_str)
|
||||
|
||||
switch (display) {
|
||||
case BASE_NONE:
|
||||
if (fi->hfinfo->type == FT_FLOAT)
|
||||
digits = FLT_DIG;
|
||||
else
|
||||
digits = DBL_DIG;
|
||||
|
||||
n = snprintf(label_str, ITEM_LABEL_LENGTH, "%.*g", digits, value);
|
||||
if (fi->hfinfo->type == FT_FLOAT) {
|
||||
n = snprintf(label_str, ITEM_LABEL_LENGTH, "%.*g", FLT_DIG, value);
|
||||
} else {
|
||||
n = (int)strlen(dtoa_g_fmt(label_str, value));
|
||||
}
|
||||
break;
|
||||
case BASE_DEC:
|
||||
n = snprintf(label_str, ITEM_LABEL_LENGTH, "%f", value);
|
||||
|
@ -23,3 +23,5 @@ We use the overflow-safe math functions from the [portable snippets](https://git
|
||||
We use the [Lrexlib](https://github.com/rrthomas/lrexlib) Lua library, specifically the PCRE2 flavour, to provide a regular expression API for Lua.
|
||||
|
||||
The code for our `strptime()` implementation is from [NetBSD](https://www.netbsd.org/).
|
||||
|
||||
We use the [dtoa](https://www.netlib.org/fp/) implementation written by David M. Gay.
|
||||
|
@ -696,7 +696,7 @@ class TestCategory019:
|
||||
[0x01, 0x80, 0x7f, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00],
|
||||
"asterix.019_600",
|
||||
{
|
||||
"asterix.019_600_LAT": "359.999999832362",
|
||||
"asterix.019_600_LAT": "359.99999983236194",
|
||||
"asterix.019_600_LON": "0"
|
||||
}
|
||||
)
|
||||
@ -713,7 +713,7 @@ class TestCategory019:
|
||||
"asterix.019_600",
|
||||
{
|
||||
"asterix.019_600_LAT": "0",
|
||||
"asterix.019_600_LON": "359.999999832362"
|
||||
"asterix.019_600_LON": "359.99999983236194"
|
||||
}
|
||||
)
|
||||
validator.add_dissection(
|
||||
@ -1516,7 +1516,7 @@ class TestCategory048:
|
||||
validator.add_dissection(
|
||||
[0x10, 0x00, 0x00, 0xff, 0xff],
|
||||
"asterix.048_040",
|
||||
dict_local(x_040, "048_040", "THETA", "359.994506835938")
|
||||
dict_local(x_040, "048_040", "THETA", "359.9945068359375")
|
||||
)
|
||||
x_070 = {
|
||||
"asterix.048_070_V": "0",
|
||||
@ -1766,7 +1766,7 @@ class TestCategory048:
|
||||
validator.add_dissection(
|
||||
[0x01, 0x04, 0x00, 0x00, 0xff, 0xff],
|
||||
"asterix.048_200",
|
||||
dict_local(x_200, "048_200", "HDG", "359.994506835938")
|
||||
dict_local(x_200, "048_200", "HDG", "359.9945068359375")
|
||||
)
|
||||
x_170 = {
|
||||
"asterix.048_170_CNF": "0",
|
||||
@ -3393,7 +3393,7 @@ class TestCategory063:
|
||||
[0x01, 0x80, 0x7f, 0xff],
|
||||
"asterix.063_081",
|
||||
{
|
||||
"asterix.063_081_VALUE": "179.994506835938"
|
||||
"asterix.063_081_VALUE": "179.9945068359375"
|
||||
}
|
||||
)
|
||||
validator.add_dissection(
|
||||
@ -3439,7 +3439,7 @@ class TestCategory063:
|
||||
[0x01, 0x20, 0x7f, 0xff],
|
||||
"asterix.063_091",
|
||||
{
|
||||
"asterix.063_091_VALUE": "179.994506835938"
|
||||
"asterix.063_091_VALUE": "179.9945068359375"
|
||||
}
|
||||
)
|
||||
validator.add_dissection(
|
||||
@ -3453,7 +3453,7 @@ class TestCategory063:
|
||||
[0x01, 0x10, 0x7f, 0xff],
|
||||
"asterix.063_092",
|
||||
{
|
||||
"asterix.063_092_VALUE": "179.994506835938"
|
||||
"asterix.063_092_VALUE": "179.9945068359375"
|
||||
}
|
||||
)
|
||||
validator.add_dissection(
|
||||
|
@ -133,6 +133,12 @@ PATH_SPECIFIC_ALLOWED_LICENSES = {
|
||||
'.gitlab/': [
|
||||
'UNKNOWN',
|
||||
],
|
||||
'wsutil/dtoa.c': [
|
||||
'dtoa',
|
||||
],
|
||||
'wsutil/dtoa.h': [
|
||||
'dtoa',
|
||||
],
|
||||
'wsutil/safe-math.h': [ # Public domain (CC0)
|
||||
'UNKNOWN',
|
||||
],
|
||||
|
@ -713,6 +713,10 @@ sub parselicense {
|
||||
$license = 'ISC';
|
||||
}
|
||||
|
||||
if ($licensetext =~ /SPDX-License-Identifier:\s+dtoa/i) {
|
||||
$license = 'dtoa';
|
||||
}
|
||||
|
||||
if ($licensetext =~ /(?:is|may be)\s(?:(?:distributed|used).*?terms|being\s+released).*?\b(L?GPL)\b/) {
|
||||
my $v = $gplver || ' (unversioned/unknown version)';
|
||||
$license = "$1$v $license";
|
||||
@ -740,6 +744,10 @@ sub parselicense {
|
||||
$license = "ISC $license";
|
||||
}
|
||||
|
||||
if ($licensetext =~ /Permission to use, copy, modify, and distribute this software for any purpose without fee is hereby granted, provided that this entire notice is included in all copies of any software which is or includes a copy or modification of this software and in all copies of the supporting documentation for such software./) {
|
||||
$license = "dtoa $license";
|
||||
}
|
||||
|
||||
if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) {
|
||||
if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) {
|
||||
$license = "BSD (4 clause) $license";
|
||||
|
@ -178,6 +178,7 @@ set(WSUTIL_COMMON_FILES
|
||||
crc11.c
|
||||
curve25519.c
|
||||
dot11decrypt_wep.c
|
||||
dtoa.c
|
||||
eax.c
|
||||
failure_message_simple.c
|
||||
feature_list.c
|
||||
|
3889
wsutil/dtoa.c
Normal file
3889
wsutil/dtoa.c
Normal file
File diff suppressed because it is too large
Load Diff
46
wsutil/dtoa.h
Normal file
46
wsutil/dtoa.h
Normal file
@ -0,0 +1,46 @@
|
||||
/** @file
|
||||
* David M. Gay dtoa (double to ASCII string) implementation header file
|
||||
*
|
||||
* Wireshark - Network traffic analyzer
|
||||
* By Gerald Combs <gerald@wireshark.org>
|
||||
* Copyright 1998 Gerald Combs
|
||||
*
|
||||
* SPDX-License-Identifier: dtoa
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <wireshark.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* Stores the closest decimal approximation to value in buf;
|
||||
* it suffices to declare buf
|
||||
* char buf[32];
|
||||
*
|
||||
* Specifically, this finds the shortest possible string that when converted
|
||||
* back to a double will be equal to the original value. There is no single
|
||||
* value that can be passed to snprintf("%.*g") that will work for all cases.
|
||||
*
|
||||
* E.g., for the IEEE 754 double closest to 1/7th (0x1.2492492492492p-3) 17
|
||||
* (DBL_DECIMAL_DIG) digits are required; neither "0.1428571428571428" nor
|
||||
* "0.1428571428571429" suffice, converting to 0x1.249249249249p-3 and
|
||||
* 0x1.2492492492494p-3, respectively. However, for the double closest to
|
||||
* 0.2 (0x1.999999999999ap-3), the closest string with 17 significant digits
|
||||
* is "0.20000000000000001", not "0.2", even though both convert *to* the
|
||||
* same double and would test as equal. So DBL_DECIMAL_DIG is *sufficient*
|
||||
* for serialization but not necessary in all cases and can look particularly
|
||||
* worse in formats where trailing zeros are removed.
|
||||
*
|
||||
* Note C++17 provides std::to_chars to provide the same result, though the
|
||||
* difficulty in implementation caused this to be one of the last widely
|
||||
* supported features across C++ standard libraries. It is not part of the
|
||||
* C standard library functions.
|
||||
*/
|
||||
WS_DLL_PUBLIC char *dtoa_g_fmt(char *buf, double value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
Loading…
x
Reference in New Issue
Block a user