gh-114087: Speed up dataclasses._asdict_inner (#114088)

2024-01-18 08:03:20 -08:00 · 2024-01-18 08:03:20 -08:00 · 2d3f6b56c5
commit 2d3f6b56c5
parent 339fc3c224
2 changed files with 55 additions and 44 deletions
--- a/Lib/dataclasses.py
+++ b/Lib/dataclasses.py
@ -1332,58 +1332,69 @@ def asdict(obj, *, dict_factory=dict):
 def _asdict_inner(obj, dict_factory):
-    if type(obj) in _ATOMIC_TYPES:
+    obj_type = type(obj)
    if obj_type in _ATOMIC_TYPES:
        return obj
-    elif _is_dataclass_instance(obj):
+    elif hasattr(obj_type, _FIELDS):
-        # fast path for the common case
+        # dataclass instance: fast path for the common case
        if dict_factory is dict:
            return {
                f.name: _asdict_inner(getattr(obj, f.name), dict)
                for f in fields(obj)
            }
        else:
-            result = []
+            return dict_factory([
-            for f in fields(obj):
+                (f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
-                value = _asdict_inner(getattr(obj, f.name), dict_factory)
+                for f in fields(obj)
-                result.append((f.name, value))
+            ])
-            return dict_factory(result)
+    # handle the builtin types first for speed; subclasses handled below
-    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
+    elif obj_type is list:
-        # obj is a namedtuple.  Recurse into it, but the returned
+        return [_asdict_inner(v, dict_factory) for v in obj]
-        # object is another namedtuple of the same type.  This is
+    elif obj_type is dict:
-        # similar to how other list- or tuple-derived classes are
+        return {
-        # treated (see below), but we just need to create them
+            _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory)
-        # differently because a namedtuple's __init__ needs to be
+            for k, v in obj.items()
-        # called differently (see bpo-34363).
+        }
    elif obj_type is tuple:
        return tuple([_asdict_inner(v, dict_factory) for v in obj])
    elif issubclass(obj_type, tuple):
        if hasattr(obj, '_fields'):
            # obj is a namedtuple.  Recurse into it, but the returned
            # object is another namedtuple of the same type.  This is
            # similar to how other list- or tuple-derived classes are
            # treated (see below), but we just need to create them
            # differently because a namedtuple's __init__ needs to be
            # called differently (see bpo-34363).
-        # I'm not using namedtuple's _asdict()
+            # I'm not using namedtuple's _asdict()
-        # method, because:
+            # method, because:
-        # - it does not recurse in to the namedtuple fields and
+            # - it does not recurse in to the namedtuple fields and
-        #   convert them to dicts (using dict_factory).
+            #   convert them to dicts (using dict_factory).
-        # - I don't actually want to return a dict here.  The main
+            # - I don't actually want to return a dict here.  The main
-        #   use case here is json.dumps, and it handles converting
+            #   use case here is json.dumps, and it handles converting
-        #   namedtuples to lists.  Admittedly we're losing some
+            #   namedtuples to lists.  Admittedly we're losing some
-        #   information here when we produce a json list instead of a
+            #   information here when we produce a json list instead of a
-        #   dict.  Note that if we returned dicts here instead of
+            #   dict.  Note that if we returned dicts here instead of
-        #   namedtuples, we could no longer call asdict() on a data
+            #   namedtuples, we could no longer call asdict() on a data
-        #   structure where a namedtuple was used as a dict key.
+            #   structure where a namedtuple was used as a dict key.
-
+            return obj_type(*[_asdict_inner(v, dict_factory) for v in obj])
-        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
+        else:
-    elif isinstance(obj, (list, tuple)):
+            return obj_type(_asdict_inner(v, dict_factory) for v in obj)
-        # Assume we can create an object of this type by passing in a
+    elif issubclass(obj_type, dict):
-        # generator (which is not true for namedtuples, handled
+        if hasattr(obj_type, 'default_factory'):
        # above).
        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
    elif isinstance(obj, dict):
        if hasattr(type(obj), 'default_factory'):
            # obj is a defaultdict, which has a different constructor from
            # dict as it requires the default_factory as its first arg.
-            result = type(obj)(getattr(obj, 'default_factory'))
+            result = obj_type(obj.default_factory)
            for k, v in obj.items():
                result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
            return result
-        return type(obj)((_asdict_inner(k, dict_factory),
+        return obj_type((_asdict_inner(k, dict_factory),
-                          _asdict_inner(v, dict_factory))
+                         _asdict_inner(v, dict_factory))
-                         for k, v in obj.items())
+                        for k, v in obj.items())
    elif issubclass(obj_type, list):
        # Assume we can create an object of this type by passing in a
        # generator
        return obj_type(_asdict_inner(v, dict_factory) for v in obj)
    else:
        return copy.deepcopy(obj)
@ -1416,11 +1427,10 @@ def _astuple_inner(obj, tuple_factory):
    if type(obj) in _ATOMIC_TYPES:
        return obj
    elif _is_dataclass_instance(obj):
-        result = []
+        return tuple_factory([
-        for f in fields(obj):
+            _astuple_inner(getattr(obj, f.name), tuple_factory)
-            value = _astuple_inner(getattr(obj, f.name), tuple_factory)
+            for f in fields(obj)
-            result.append(value)
+        ])
        return tuple_factory(result)
    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
        # obj is a namedtuple.  Recurse into it, but the returned
        # object is another namedtuple of the same type.  This is
--- a/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst
+++ b/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst
@ -0,0 +1 @@
 Speed up ``dataclasses.asdict`` up to 1.35x.