gh-114087: Speed up dataclasses._asdict_inner (#114088)

This commit is contained in:
keithasaurus 2024-01-18 08:03:20 -08:00 committed by GitHub
parent 339fc3c224
commit 2d3f6b56c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 55 additions and 44 deletions

View File

@ -1332,58 +1332,69 @@ def asdict(obj, *, dict_factory=dict):
def _asdict_inner(obj, dict_factory): def _asdict_inner(obj, dict_factory):
if type(obj) in _ATOMIC_TYPES: obj_type = type(obj)
if obj_type in _ATOMIC_TYPES:
return obj return obj
elif _is_dataclass_instance(obj): elif hasattr(obj_type, _FIELDS):
# fast path for the common case # dataclass instance: fast path for the common case
if dict_factory is dict: if dict_factory is dict:
return { return {
f.name: _asdict_inner(getattr(obj, f.name), dict) f.name: _asdict_inner(getattr(obj, f.name), dict)
for f in fields(obj) for f in fields(obj)
} }
else: else:
result = [] return dict_factory([
for f in fields(obj): (f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
value = _asdict_inner(getattr(obj, f.name), dict_factory) for f in fields(obj)
result.append((f.name, value)) ])
return dict_factory(result) # handle the builtin types first for speed; subclasses handled below
elif isinstance(obj, tuple) and hasattr(obj, '_fields'): elif obj_type is list:
# obj is a namedtuple. Recurse into it, but the returned return [_asdict_inner(v, dict_factory) for v in obj]
# object is another namedtuple of the same type. This is elif obj_type is dict:
# similar to how other list- or tuple-derived classes are return {
# treated (see below), but we just need to create them _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory)
# differently because a namedtuple's __init__ needs to be for k, v in obj.items()
# called differently (see bpo-34363). }
elif obj_type is tuple:
return tuple([_asdict_inner(v, dict_factory) for v in obj])
elif issubclass(obj_type, tuple):
if hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is
# similar to how other list- or tuple-derived classes are
# treated (see below), but we just need to create them
# differently because a namedtuple's __init__ needs to be
# called differently (see bpo-34363).
# I'm not using namedtuple's _asdict() # I'm not using namedtuple's _asdict()
# method, because: # method, because:
# - it does not recurse in to the namedtuple fields and # - it does not recurse in to the namedtuple fields and
# convert them to dicts (using dict_factory). # convert them to dicts (using dict_factory).
# - I don't actually want to return a dict here. The main # - I don't actually want to return a dict here. The main
# use case here is json.dumps, and it handles converting # use case here is json.dumps, and it handles converting
# namedtuples to lists. Admittedly we're losing some # namedtuples to lists. Admittedly we're losing some
# information here when we produce a json list instead of a # information here when we produce a json list instead of a
# dict. Note that if we returned dicts here instead of # dict. Note that if we returned dicts here instead of
# namedtuples, we could no longer call asdict() on a data # namedtuples, we could no longer call asdict() on a data
# structure where a namedtuple was used as a dict key. # structure where a namedtuple was used as a dict key.
return obj_type(*[_asdict_inner(v, dict_factory) for v in obj])
return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) else:
elif isinstance(obj, (list, tuple)): return obj_type(_asdict_inner(v, dict_factory) for v in obj)
# Assume we can create an object of this type by passing in a elif issubclass(obj_type, dict):
# generator (which is not true for namedtuples, handled if hasattr(obj_type, 'default_factory'):
# above).
return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
elif isinstance(obj, dict):
if hasattr(type(obj), 'default_factory'):
# obj is a defaultdict, which has a different constructor from # obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg. # dict as it requires the default_factory as its first arg.
result = type(obj)(getattr(obj, 'default_factory')) result = obj_type(obj.default_factory)
for k, v in obj.items(): for k, v in obj.items():
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
return result return result
return type(obj)((_asdict_inner(k, dict_factory), return obj_type((_asdict_inner(k, dict_factory),
_asdict_inner(v, dict_factory)) _asdict_inner(v, dict_factory))
for k, v in obj.items()) for k, v in obj.items())
elif issubclass(obj_type, list):
# Assume we can create an object of this type by passing in a
# generator
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
else: else:
return copy.deepcopy(obj) return copy.deepcopy(obj)
@ -1416,11 +1427,10 @@ def _astuple_inner(obj, tuple_factory):
if type(obj) in _ATOMIC_TYPES: if type(obj) in _ATOMIC_TYPES:
return obj return obj
elif _is_dataclass_instance(obj): elif _is_dataclass_instance(obj):
result = [] return tuple_factory([
for f in fields(obj): _astuple_inner(getattr(obj, f.name), tuple_factory)
value = _astuple_inner(getattr(obj, f.name), tuple_factory) for f in fields(obj)
result.append(value) ])
return tuple_factory(result)
elif isinstance(obj, tuple) and hasattr(obj, '_fields'): elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned # obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is # object is another namedtuple of the same type. This is

View File

@ -0,0 +1 @@
Speed up ``dataclasses.asdict`` up to 1.35x.