Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)
This commit is contained in:
parent
5bc2390229
commit
e01eeb7b4b
@ -611,7 +611,7 @@ def median_high(data):
|
|||||||
return data[n // 2]
|
return data[n // 2]
|
||||||
|
|
||||||
|
|
||||||
def median_grouped(data, interval=1):
|
def median_grouped(data, interval=1.0):
|
||||||
"""Estimates the median for numeric data binned around the midpoints
|
"""Estimates the median for numeric data binned around the midpoints
|
||||||
of consecutive, fixed-width intervals.
|
of consecutive, fixed-width intervals.
|
||||||
|
|
||||||
@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
|
|||||||
by exact multiples of *interval*. This is essential for getting a
|
by exact multiples of *interval*. This is essential for getting a
|
||||||
correct result. The function does not check this precondition.
|
correct result. The function does not check this precondition.
|
||||||
|
|
||||||
|
Inputs may be any numeric type that can be coerced to a float during
|
||||||
|
the interpolation step.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data = sorted(data)
|
data = sorted(data)
|
||||||
n = len(data)
|
n = len(data)
|
||||||
if n == 0:
|
if not n:
|
||||||
raise StatisticsError("no median for empty data")
|
raise StatisticsError("no median for empty data")
|
||||||
elif n == 1:
|
|
||||||
return data[0]
|
|
||||||
|
|
||||||
# Find the value at the midpoint. Remember this corresponds to the
|
# Find the value at the midpoint. Remember this corresponds to the
|
||||||
# midpoint of the class interval.
|
# midpoint of the class interval.
|
||||||
x = data[n // 2]
|
x = data[n // 2]
|
||||||
|
|
||||||
# Generate a clear error message for non-numeric data
|
|
||||||
for obj in (x, interval):
|
|
||||||
if isinstance(obj, (str, bytes)):
|
|
||||||
raise TypeError(f'expected a number but got {obj!r}')
|
|
||||||
|
|
||||||
# Using O(log n) bisection, find where all the x values occur in the data.
|
# Using O(log n) bisection, find where all the x values occur in the data.
|
||||||
# All x will lie within data[i:j].
|
# All x will lie within data[i:j].
|
||||||
i = bisect_left(data, x)
|
i = bisect_left(data, x)
|
||||||
j = bisect_right(data, x, lo=i)
|
j = bisect_right(data, x, lo=i)
|
||||||
|
|
||||||
|
# Coerce to floats, raising a TypeError if not possible
|
||||||
|
try:
|
||||||
|
interval = float(interval)
|
||||||
|
x = float(x)
|
||||||
|
except ValueError:
|
||||||
|
raise TypeError(f'Value cannot be converted to a float')
|
||||||
|
|
||||||
# Interpolate the median using the formula found at:
|
# Interpolate the median using the formula found at:
|
||||||
# https://www.cuemath.com/data/median-of-grouped-data/
|
# https://www.cuemath.com/data/median-of-grouped-data/
|
||||||
try:
|
L = x - interval / 2.0 # Lower limit of the median interval
|
||||||
L = x - interval / 2 # The lower limit of the median interval.
|
|
||||||
except TypeError:
|
|
||||||
# Coerce mixed types to float.
|
|
||||||
L = float(x) - float(interval) / 2
|
|
||||||
cf = i # Cumulative frequency of the preceding interval
|
cf = i # Cumulative frequency of the preceding interval
|
||||||
f = j - i # Number of elements in the median internal
|
f = j - i # Number of elements in the median internal
|
||||||
return L + interval * (n / 2 - cf) / f
|
return L + interval * (n / 2 - cf) / f
|
||||||
|
@ -1742,6 +1742,12 @@ class TestMedianGrouped(TestMedian):
|
|||||||
data = [x]*count
|
data = [x]*count
|
||||||
self.assertEqual(self.func(data), float(x))
|
self.assertEqual(self.func(data), float(x))
|
||||||
|
|
||||||
|
def test_single_value(self):
|
||||||
|
# Override method from AverageMixin.
|
||||||
|
# Average of a single value is the value as a float.
|
||||||
|
for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
|
||||||
|
self.assertEqual(self.func([x]), float(x))
|
||||||
|
|
||||||
def test_odd_fractions(self):
|
def test_odd_fractions(self):
|
||||||
# Test median_grouped works with an odd number of Fractions.
|
# Test median_grouped works with an odd number of Fractions.
|
||||||
F = Fraction
|
F = Fraction
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
The statistics.median_grouped() function now always return a float.
|
||||||
|
Formerly, it did not convert the input type when for sequences of length
|
||||||
|
one.
|
Loading…
x
Reference in New Issue
Block a user