OK, I couldn't stand it <0.5 wink>: removed all uncertainty about what's
in gc_refs, even at the cost of putting back a test+branch in visit_decref. The good news: since gc_refs became utterly tame then, it became clear that another special value could be useful. The move_roots() and move_root_reachable() passes have now been replaced by a single move_unreachable() pass. Besides saving a pass over the generation, this has a better effect: most of the time everything turns out to be reachable, so we were breaking the generation list apart and moving it into into the reachable list, one element at a time. Now the reachable stuff stays in the generation list, and the unreachable stuff is moved instead. This isn't quite as good as it sounds, since sometimes we guess wrongly that a thing is unreachable, and have to move it back again. Still, overall, it yields a significant (but not dramatic) boost in collection speed.
This commit is contained in:
parent
93cd83e4ae
commit
19b74c7868
@ -74,17 +74,20 @@ static int debug;
|
|||||||
|
|
||||||
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
|
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
|
||||||
* the objects in the generation being collected, called the "young"
|
* the objects in the generation being collected, called the "young"
|
||||||
* generation at that point. As collection proceeds, when it's determined
|
* generation at that point. As collection proceeds, the gc_refs members
|
||||||
* that one of these can't be collected (e.g., because it's reachable from
|
* of young objects are set to GC_REACHABLE when it becomes known that they're
|
||||||
* outside, or has a __del__ method), the object is moved out of young, and
|
* uncollectable, and to GC_TENTATIVELY_UNREACHABLE when the evidence
|
||||||
* gc_refs is set to a negative value. The latter is so we can distinguish
|
* suggests they are collectable (this can't be known for certain until all
|
||||||
* collection candidates from non-candidates just by looking at the object.
|
* of the young generation is scanned).
|
||||||
*/
|
*/
|
||||||
/* Special gc_refs value, although any negative value means "moved". */
|
|
||||||
#define GC_MOVED -123
|
|
||||||
|
|
||||||
/* True iff an object is still a candidate for collection. */
|
/* Special gc_refs values. */
|
||||||
#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
|
#define GC_REACHABLE -123
|
||||||
|
#define GC_TENTATIVELY_UNREACHABLE -42
|
||||||
|
|
||||||
|
#define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
|
||||||
|
#define IS_TENTATIVELY_UNREACHABLE(o) ( \
|
||||||
|
(AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
|
||||||
|
|
||||||
/* list of uncollectable objects */
|
/* list of uncollectable objects */
|
||||||
static PyObject *garbage;
|
static PyObject *garbage;
|
||||||
@ -168,41 +171,40 @@ gc_list_size(PyGC_Head *list)
|
|||||||
/*** end of list stuff ***/
|
/*** end of list stuff ***/
|
||||||
|
|
||||||
|
|
||||||
|
/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
|
||||||
/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
|
* in containers, and is GC_REACHABLE for all tracked gc objects not in
|
||||||
* for all objects in containers, and false for all tracked gc objects not
|
* containers.
|
||||||
* in containers (although see the comment in visit_decref).
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
update_refs(PyGC_Head *containers)
|
update_refs(PyGC_Head *containers)
|
||||||
{
|
{
|
||||||
PyGC_Head *gc = containers->gc.gc_next;
|
PyGC_Head *gc = containers->gc.gc_next;
|
||||||
for (; gc != containers; gc=gc->gc.gc_next) {
|
for (; gc != containers; gc = gc->gc.gc_next)
|
||||||
gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
|
gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A traversal callback for subtract_refs. */
|
||||||
static int
|
static int
|
||||||
visit_decref(PyObject *op, void *data)
|
visit_decref(PyObject *op, void *data)
|
||||||
{
|
{
|
||||||
/* There's no point to decrementing gc_refs unless
|
|
||||||
* STILL_A_CANDIDATE(op) is true. It would take extra cycles to
|
|
||||||
* check that, though. If STILL_A_CANDIDATE(op) is false,
|
|
||||||
* decrementing gc_refs almost always makes it "even more negative",
|
|
||||||
* so doesn't change that STILL_A_CANDIDATE is false, and no harm is
|
|
||||||
* done. However, it's possible that, after many collections, this
|
|
||||||
* could underflow gc_refs in a long-lived old object. In that case,
|
|
||||||
* visit_move() may move the old object back to the generation
|
|
||||||
* getting collected. That would be a waste of time, but wouldn't
|
|
||||||
* cause an error.
|
|
||||||
*/
|
|
||||||
assert(op != NULL);
|
assert(op != NULL);
|
||||||
if (PyObject_IS_GC(op))
|
if (PyObject_IS_GC(op)) {
|
||||||
AS_GC(op)->gc.gc_refs--;
|
PyGC_Head *gc = AS_GC(op);
|
||||||
|
/* We're only interested in gc_refs for objects in the
|
||||||
|
* generation being collected, which can be recognized
|
||||||
|
* because only they have positive gc_refs.
|
||||||
|
*/
|
||||||
|
if (gc->gc.gc_refs > 0)
|
||||||
|
gc->gc.gc_refs--;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Subtract internal references from gc_refs */
|
/* Subtract internal references from gc_refs. After this, gc_refs is >= 0
|
||||||
|
* for all objects in containers, and is GC_REACHABLE for all tracked gc
|
||||||
|
* objects not in containers. The ones with gc_refs > 0 are directly
|
||||||
|
* reachable from outside containers, and so can't be collected.
|
||||||
|
*/
|
||||||
static void
|
static void
|
||||||
subtract_refs(PyGC_Head *containers)
|
subtract_refs(PyGC_Head *containers)
|
||||||
{
|
{
|
||||||
@ -216,52 +218,100 @@ subtract_refs(PyGC_Head *containers)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
|
/* A traversal callback for move_unreachable. */
|
||||||
static void
|
|
||||||
move_roots(PyGC_Head *containers, PyGC_Head *roots)
|
|
||||||
{
|
|
||||||
PyGC_Head *next;
|
|
||||||
PyGC_Head *gc = containers->gc.gc_next;
|
|
||||||
while (gc != containers) {
|
|
||||||
next = gc->gc.gc_next;
|
|
||||||
if (gc->gc.gc_refs > 0) {
|
|
||||||
gc_list_remove(gc);
|
|
||||||
gc_list_append(gc, roots);
|
|
||||||
gc->gc.gc_refs = GC_MOVED;
|
|
||||||
}
|
|
||||||
gc = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
visit_move(PyObject *op, PyGC_Head *tolist)
|
visit_reachable(PyObject *op, PyGC_Head *reachable)
|
||||||
{
|
{
|
||||||
if (PyObject_IS_GC(op)) {
|
if (PyObject_IS_GC(op) && IS_TRACKED(op)) {
|
||||||
if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
|
|
||||||
PyGC_Head *gc = AS_GC(op);
|
PyGC_Head *gc = AS_GC(op);
|
||||||
gc_list_remove(gc);
|
const int gc_refs = gc->gc.gc_refs;
|
||||||
gc_list_append(gc, tolist);
|
|
||||||
gc->gc.gc_refs = GC_MOVED;
|
if (gc_refs == 0) {
|
||||||
|
/* This is in move_unreachable's 'young' list, but
|
||||||
|
* the traversal hasn't yet gotten to it. All
|
||||||
|
* we need to do is tell move_unreachable that it's
|
||||||
|
* reachable.
|
||||||
|
*/
|
||||||
|
gc->gc.gc_refs = 1;
|
||||||
}
|
}
|
||||||
|
else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
|
||||||
|
/* This had gc_refs = 0 when move_unreachable got
|
||||||
|
* to it, but turns out it's reachable after all.
|
||||||
|
* Move it back to move_unreachable's 'young' list,
|
||||||
|
* and move_unreachable will eventually get to it
|
||||||
|
* again.
|
||||||
|
*/
|
||||||
|
gc_list_remove(gc);
|
||||||
|
gc_list_append(gc, reachable);
|
||||||
|
gc->gc.gc_refs = 1;
|
||||||
|
}
|
||||||
|
/* Else there's nothing to do.
|
||||||
|
* If gc_refs > 0, it must be in move_unreachable's 'young'
|
||||||
|
* list, and move_unreachable will eventually get to it.
|
||||||
|
* If gc_refs == GC_REACHABLE, it's either in some other
|
||||||
|
* generation so we don't care about it, or move_unreachable
|
||||||
|
* already dealt with it.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Move candidates referenced from reachable to reachable set (they're no
|
/* Move the unreachable objects from young to unreachable. After this,
|
||||||
* longer candidates).
|
* all objects in young have gc_refs = GC_REACHABLE, and all objects in
|
||||||
|
* unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
|
||||||
|
* gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
|
||||||
|
* All objects in young after this are directly or indirectly reachable
|
||||||
|
* from outside the original young; and all objects in unreachable are
|
||||||
|
* not.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
move_root_reachable(PyGC_Head *reachable)
|
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
|
||||||
{
|
{
|
||||||
traverseproc traverse;
|
PyGC_Head *gc = young->gc.gc_next;
|
||||||
PyGC_Head *gc = reachable->gc.gc_next;
|
|
||||||
for (; gc != reachable; gc=gc->gc.gc_next) {
|
/* Invariants: all objects "to the left" of us in young have gc_refs
|
||||||
/* careful, reachable list is growing here */
|
* = GC_REACHABLE, and are indeed reachable (directly or indirectly)
|
||||||
|
* from outside the young list as it was at entry. All other objects
|
||||||
|
* from the original young "to the left" of us are in unreachable now,
|
||||||
|
* and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
|
||||||
|
* left of us in 'young' now have been scanned, and no objects here
|
||||||
|
* or to the right have been scanned yet.
|
||||||
|
*/
|
||||||
|
|
||||||
|
while (gc != young) {
|
||||||
|
PyGC_Head *next;
|
||||||
|
|
||||||
|
if (gc->gc.gc_refs == 0) {
|
||||||
|
/* This *may* be unreachable. To make progress,
|
||||||
|
* assume it is. gc isn't directly reachable from
|
||||||
|
* any object we've already traversed, but may be
|
||||||
|
* reachable from an object we haven't gotten to yet.
|
||||||
|
* visit_reachable will eventually move gc back into
|
||||||
|
* young if that's so, and we'll see it again.
|
||||||
|
*/
|
||||||
|
next = gc->gc.gc_next;
|
||||||
|
gc_list_remove(gc);
|
||||||
|
gc_list_append(gc, unreachable);
|
||||||
|
gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* gc is definitely reachable from outside the
|
||||||
|
* original 'young'. Mark it as such, and traverse
|
||||||
|
* its pointers to find any other objects that may
|
||||||
|
* be directly reachable from it. Note that the
|
||||||
|
* call to tp_traverse may append objects to young,
|
||||||
|
* so we have to wait until it returns to determine
|
||||||
|
* the next object to visit.
|
||||||
|
*/
|
||||||
PyObject *op = FROM_GC(gc);
|
PyObject *op = FROM_GC(gc);
|
||||||
traverse = op->ob_type->tp_traverse;
|
traverseproc traverse = op->ob_type->tp_traverse;
|
||||||
|
gc->gc.gc_refs = GC_REACHABLE;
|
||||||
(void) traverse(op,
|
(void) traverse(op,
|
||||||
(visitproc)visit_move,
|
(visitproc)visit_reachable,
|
||||||
(void *)reachable);
|
(void *)young);
|
||||||
|
next = gc->gc.gc_next;
|
||||||
|
}
|
||||||
|
gc = next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,12 +342,29 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
|
|||||||
if (has_finalizer(op)) {
|
if (has_finalizer(op)) {
|
||||||
gc_list_remove(gc);
|
gc_list_remove(gc);
|
||||||
gc_list_append(gc, finalizers);
|
gc_list_append(gc, finalizers);
|
||||||
gc->gc.gc_refs = GC_MOVED;
|
gc->gc.gc_refs = GC_REACHABLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Move objects referenced from roots to roots */
|
/* A traversal callback for move_finalizer_reachable. */
|
||||||
|
static int
|
||||||
|
visit_move(PyObject *op, PyGC_Head *tolist)
|
||||||
|
{
|
||||||
|
if (PyObject_IS_GC(op)) {
|
||||||
|
if (IS_TRACKED(op) && IS_TENTATIVELY_UNREACHABLE(op)) {
|
||||||
|
PyGC_Head *gc = AS_GC(op);
|
||||||
|
gc_list_remove(gc);
|
||||||
|
gc_list_append(gc, tolist);
|
||||||
|
gc->gc.gc_refs = GC_REACHABLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Move objects that are reachable from finalizers, from the unreachable set
|
||||||
|
* into the finalizers set.
|
||||||
|
*/
|
||||||
static void
|
static void
|
||||||
move_finalizer_reachable(PyGC_Head *finalizers)
|
move_finalizer_reachable(PyGC_Head *finalizers)
|
||||||
{
|
{
|
||||||
@ -353,11 +420,12 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
|
|||||||
/* If SAVEALL is not set then just append objects with
|
/* If SAVEALL is not set then just append objects with
|
||||||
* finalizers to the list of garbage. All objects in
|
* finalizers to the list of garbage. All objects in
|
||||||
* the finalizers list are reachable from those
|
* the finalizers list are reachable from those
|
||||||
* objects. */
|
* objects.
|
||||||
|
*/
|
||||||
PyList_Append(garbage, op);
|
PyList_Append(garbage, op);
|
||||||
}
|
}
|
||||||
/* object is now reachable again */
|
/* object is now reachable again */
|
||||||
assert(!STILL_A_CANDIDATE(op));
|
assert(IS_REACHABLE(op));
|
||||||
gc_list_remove(gc);
|
gc_list_remove(gc);
|
||||||
gc_list_append(gc, old);
|
gc_list_append(gc, old);
|
||||||
}
|
}
|
||||||
@ -365,7 +433,8 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
|
|||||||
|
|
||||||
/* Break reference cycles by clearing the containers involved. This is
|
/* Break reference cycles by clearing the containers involved. This is
|
||||||
* tricky business as the lists can be changing and we don't know which
|
* tricky business as the lists can be changing and we don't know which
|
||||||
* objects may be freed. It is possible I screwed something up here. */
|
* objects may be freed. It is possible I screwed something up here.
|
||||||
|
*/
|
||||||
static void
|
static void
|
||||||
delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
||||||
{
|
{
|
||||||
@ -375,7 +444,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
|||||||
PyGC_Head *gc = unreachable->gc.gc_next;
|
PyGC_Head *gc = unreachable->gc.gc_next;
|
||||||
PyObject *op = FROM_GC(gc);
|
PyObject *op = FROM_GC(gc);
|
||||||
|
|
||||||
assert(STILL_A_CANDIDATE(op));
|
assert(IS_TENTATIVELY_UNREACHABLE(op));
|
||||||
if (debug & DEBUG_SAVEALL) {
|
if (debug & DEBUG_SAVEALL) {
|
||||||
PyList_Append(garbage, op);
|
PyList_Append(garbage, op);
|
||||||
}
|
}
|
||||||
@ -390,7 +459,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
|||||||
/* object is still alive, move it, it may die later */
|
/* object is still alive, move it, it may die later */
|
||||||
gc_list_remove(gc);
|
gc_list_remove(gc);
|
||||||
gc_list_append(gc, old);
|
gc_list_append(gc, old);
|
||||||
gc->gc.gc_refs = GC_MOVED;
|
gc->gc.gc_refs = GC_REACHABLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -401,11 +470,10 @@ static long
|
|||||||
collect(int generation)
|
collect(int generation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
long n = 0;
|
long m = 0; /* # objects collected */
|
||||||
long m = 0;
|
long n = 0; /* # unreachable objects that couldn't be collected */
|
||||||
PyGC_Head *young; /* the generation we are examining */
|
PyGC_Head *young; /* the generation we are examining */
|
||||||
PyGC_Head *old; /* next older generation */
|
PyGC_Head *old; /* next older generation */
|
||||||
PyGC_Head reachable;
|
|
||||||
PyGC_Head unreachable;
|
PyGC_Head unreachable;
|
||||||
PyGC_Head finalizers;
|
PyGC_Head finalizers;
|
||||||
PyGC_Head *gc;
|
PyGC_Head *gc;
|
||||||
@ -433,38 +501,37 @@ collect(int generation)
|
|||||||
|
|
||||||
/* handy references */
|
/* handy references */
|
||||||
young = GEN_HEAD(generation);
|
young = GEN_HEAD(generation);
|
||||||
if (generation < NUM_GENERATIONS-1) {
|
if (generation < NUM_GENERATIONS-1)
|
||||||
old = GEN_HEAD(generation+1);
|
old = GEN_HEAD(generation+1);
|
||||||
} else {
|
else
|
||||||
old = GEN_HEAD(NUM_GENERATIONS-1);
|
old = young;
|
||||||
}
|
|
||||||
|
|
||||||
/* Using ob_refcnt and gc_refs, calculate which objects in the
|
/* Using ob_refcnt and gc_refs, calculate which objects in the
|
||||||
* container set are reachable from outside the set (ie. have a
|
* container set are reachable from outside the set (ie. have a
|
||||||
* refcount greater than 0 when all the references within the
|
* refcount greater than 0 when all the references within the
|
||||||
* set are taken into account */
|
* set are taken into account
|
||||||
|
*/
|
||||||
update_refs(young);
|
update_refs(young);
|
||||||
subtract_refs(young);
|
subtract_refs(young);
|
||||||
|
|
||||||
/* Move everything reachable from outside the set into the
|
/* Leave everything reachable from outside young in young, and move
|
||||||
* reachable set (ie. gc_refs > 0). Next, move everything
|
* everything else (in young) to unreachable.
|
||||||
* reachable from objects in the reachable set. */
|
* NOTE: This used to move the reachable objects into a reachable
|
||||||
gc_list_init(&reachable);
|
* set instead. But most things usually turn out to be reachable,
|
||||||
move_roots(young, &reachable);
|
* so it's more efficient to move the unreachable things.
|
||||||
move_root_reachable(&reachable);
|
*/
|
||||||
|
|
||||||
/* move unreachable objects to a temporary list, new objects can be
|
|
||||||
* allocated after this point */
|
|
||||||
gc_list_init(&unreachable);
|
gc_list_init(&unreachable);
|
||||||
gc_list_move(young, &unreachable);
|
move_unreachable(young, &unreachable);
|
||||||
|
|
||||||
/* move reachable objects to next generation */
|
/* Move reachable objects to next generation. */
|
||||||
gc_list_merge(&reachable, old);
|
if (young != old)
|
||||||
|
gc_list_merge(young, old);
|
||||||
|
|
||||||
/* Move objects reachable from finalizers, we can't safely delete
|
/* All objects in unreachable are trash, but objects reachable from
|
||||||
* them. Python programmers should take care not to create such
|
* finalizers can't safely be deleted. Python programmers should take
|
||||||
* things. For Python finalizers means instance objects with
|
* care not to create such things. For Python, finalizers means
|
||||||
* __del__ methods. */
|
* instance objects with __del__ methods.
|
||||||
|
*/
|
||||||
gc_list_init(&finalizers);
|
gc_list_init(&finalizers);
|
||||||
move_finalizers(&unreachable, &finalizers);
|
move_finalizers(&unreachable, &finalizers);
|
||||||
move_finalizer_reachable(&finalizers);
|
move_finalizer_reachable(&finalizers);
|
||||||
@ -478,7 +545,7 @@ collect(int generation)
|
|||||||
debug_cycle("collectable", FROM_GC(gc));
|
debug_cycle("collectable", FROM_GC(gc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* call tp_clear on objects in the collectable set. This will cause
|
/* Call tp_clear on objects in the collectable set. This will cause
|
||||||
* the reference cycles to be broken. It may also cause some objects in
|
* the reference cycles to be broken. It may also cause some objects in
|
||||||
* finalizers to be freed */
|
* finalizers to be freed */
|
||||||
delete_garbage(&unreachable, old);
|
delete_garbage(&unreachable, old);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user