AutoFreeAlloc vs ScopeAlloc

If you prefer chinese, see http://cpp.winxgui.com/cn:allocators-performance-comparison.

Summary

In this article we'll talk about performance comparison of:

Because most of allocators focus to improve performance of small objects allocation, we only test to allocate small objects here.

Test Environment

  • CPU: 1.2 G
  • OS: Windows XP
  • Compiler: Visual C++ 6.0
  • Optimization: Maximize speed
  • C Runtime library: Multithreaded DLL
  • Settings: Win32 Release

Condition 1: One allocator instance only allocate a few small objects

Test method: One allocator instance only allocate one "int" object.

Source code (See <stdext/Memory.h>):

template <class LogT>
class TestCompareAllocators : public TestCase
{
    WINX_TEST_SUITE(TestCompareAllocators);
        WINX_TEST(testComparison1);
    WINX_TEST_SUITE_END();
 
public:
    enum { N = 60000 };
 
    void doNewDelete1(LogT& log)
    {
        log.print("===== NewDelete =====\n");
        PerformanceCounter counter;
        for (int i = 0; i < N; ++i)
        {
            int* p = new int;
            delete p;
        }
        counter.trace(log);
    }
 
    void doAutoFreeAlloc1(LogT& log)
    {
        log.print("===== AutoFreeAlloc =====\n");
        PerformanceCounter counter;
        for (int i = 0; i < N; ++i)
        {
            AutoFreeAlloc alloc;
            int* p = STD_NEW(alloc, int);
        }
        counter.trace(log);
    }
 
    void doScopeAlloc1(LogT& log)
    {
        log.print("===== ScopeAlloc =====\n");
        BlockPool recycle;
        PerformanceCounter counter;
        for (int i = 0; i < N; ++i)
        {
            ScopeAlloc alloc(recycle);
            int* p = STD_NEW(alloc, int);
        }
        counter.trace(log);
    }
 
    void testComparison1(LogT& log)
    {
        for (int i = 0; i < 4; ++i)
        {
            log.newline();
            doAutoFreeAlloc1(log);
            doNewDelete1(log);
            doScopeAlloc1(log);
        }
    }
};

Output:

===== AutoFreeAlloc =====
---> Elapse 283773 ticks (79.28 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 134936 ticks (37.70 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 8285 ticks (2.31 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 184090 ticks (51.43 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 116476 ticks (32.54 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 9831 ticks (2.75 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 179652 ticks (50.19 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 130553 ticks (36.47 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 8387 ticks (2.34 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 137255 ticks (38.34 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 103869 ticks (29.02 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 8215 ticks (2.29 ms) (0.00 min) ...

Conclusion:

When an allocator instance only allocate a few memory, AutoFreeAlloc has the worst performance, new/delete is worse, and ScopeAlloc has good performance however.

Condition 2: One allocator instance allocate a large amount of small objects

Test method: One allocator instance allocate 6000 "int" objects.

Source code (See <Memory.h>):

template <class LogT>
class TestCompareAllocators : public TestCase
{
    WINX_TEST_SUITE(TestCompareAllocators);
        WINX_TEST(testComparison2);
    WINX_TEST_SUITE_END();
 
public:
    enum { N = 60000 };
 
    void doNewDelete2(LogT& log)
    {
        int i, *p[N];
        log.print("===== NewDelete =====\n");
        PerformanceCounter counter;
        for (i = 0; i < N; ++i)
        {
            p[i] = new int;
        }
        for (i = 0; i < N; ++i)
        {
            delete p[i];
        }
        counter.trace(log);
    }
 
    void doAutoFreeAlloc2(LogT& log)
    {
        log.print("===== AutoFreeAlloc =====\n");
        PerformanceCounter counter;
        {
            AutoFreeAlloc alloc;
            for (int i = 0; i < N; ++i)
            {
                int* p = STD_NEW(alloc, int);
            }
        }
        counter.trace(log);
    }
 
    void doScopeAlloc2(LogT& log)
    {
        log.print("===== ScopeAlloc =====\n");
        BlockPool recycle;
        PerformanceCounter counter;
        {
            ScopeAlloc alloc(recycle);
            for (int i = 0; i < N; ++i)
            {
                int* p = STD_NEW(alloc, int);
            }
        }
        counter.trace(log);
    }
 
    void testComparison2(LogT& log)
    {
        for (int i = 0; i < 4; ++i)
        {
            log.newline();
            doAutoFreeAlloc2(log);
            doNewDelete2(log);
            doScopeAlloc2(log);
        }
    }
};

Output:

===== AutoFreeAlloc =====
---> Elapse 1771 ticks (0.49 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 117791 ticks (32.91 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 2657 ticks (0.74 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 1637 ticks (0.46 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 114280 ticks (31.93 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 2516 ticks (0.70 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 1668 ticks (0.47 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 118661 ticks (33.15 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 2553 ticks (0.71 ms) (0.00 min) ...

===== AutoFreeAlloc =====
---> Elapse 1678 ticks (0.47 ms) (0.00 min) ...
===== NewDelete =====
---> Elapse 113231 ticks (31.63 ms) (0.00 min) ...
===== ScopeAlloc =====
---> Elapse 3287 ticks (0.92 ms) (0.00 min) ...

Conclusion:

When an allocator instance allocate a large amount of objects, AutoFreeAlloc has the best performance, ScopeAlloc is better (and very close to AutoFreeAlloc), and new/delete has the worst performance.

Suggestions

Suggestion 1

AutoFreeAlloc is useful in some special conditions (eg. in an IO process, in a Layout Render process, etc). We use it in "The fastest word file writer".

Suggestion 2

If you don't know you want AutoFreeAlloc or ScopeAlloc, please choose ScopeAlloc. It have good performance in any condition indeed. If you need a timely memory recycle, ScopeAlloc is the only choice.

A complex example of ScopeAlloc

int main()
{
    std::BlockPool recycle;
    std::ScopeAlloc alloc(recycle);
    printf("\n------------------- global: have 3 objs ----------------\n");
    {
        Obj* a1 = STD_NEW(alloc, Obj)(0);
        Obj* a2 = STD_NEW_ARRAY(alloc, Obj, 2);
        printf("------------------- child 1: have 4 objs ----------------\n");
        {
            std::ScopeAlloc child1(alloc);
            Obj* o1 = STD_NEW(child1, Obj)(1);
            Obj* o2 = STD_NEW_ARRAY(child1, Obj, 3);
            printf("------------------- child 11: have 3 objs ----------------\n");
            {
                std::ScopeAlloc* child11 = STD_NEW(child1, std::ScopeAlloc)(child1);
                Obj* o11 = STD_NEW(*child11, Obj)(11);
                Obj* o12 = STD_NEW_ARRAY(*child11, Obj, 2);
            }
            printf("------------------- leave child 11 ----------------\n");
            printf("------------------- child 12: have 3 objs ----------------\n");
            {
                std::ScopeAlloc child12(child1);
                Obj* o11 = STD_NEW(child12, Obj)(12);
                Obj* o12 = STD_NEW_ARRAY(child12, Obj, 2);
            }
            printf("------------------- leave child 12 ----------------\n");
        }
        printf("------------------- leave child 1 ----------------\n");
        printf("------------------- child 2: have 4 objs ----------------\n");
        {
            std::ScopeAlloc child2(alloc);
            Obj* o1 = STD_NEW(child2, Obj)(2);
            Obj* o2 = STD_NEW_ARRAY(child2, Obj, 3);
        }
        printf("------------------- leave child 2 ----------------\n");
    }
    return 0;
}

Output:

------------------- global: have 3 objs ----------------
construct Obj: 0
construct Obj: 0
construct Obj: 0
------------------- child 1: have 4 objs ----------------
construct Obj: 1
construct Obj: 0
construct Obj: 0
construct Obj: 0
------------------- child 11: have 3 objs ----------------
construct Obj: 11
construct Obj: 0
construct Obj: 0
------------------- leave child 11 ----------------
------------------- child 12: have 3 objs ----------------
construct Obj: 12
construct Obj: 0
construct Obj: 0
destruct Obj: 0
destruct Obj: 0
destruct Obj: 12
------------------- leave child 12 ----------------
destruct Obj: 0
destruct Obj: 0
destruct Obj: 11
destruct Obj: 0
destruct Obj: 0
destruct Obj: 0
destruct Obj: 1
------------------- leave child 1 ----------------
------------------- child 2: have 4 objs ----------------
construct Obj: 2
construct Obj: 0
construct Obj: 0
construct Obj: 0
destruct Obj: 0
destruct Obj: 0
destruct Obj: 0
destruct Obj: 2
------------------- leave child 2 ----------------
destruct Obj: 0
destruct Obj: 0
destruct Obj: 0
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License