IT博客汇 | [erlang]性能测试与编译期优化

[erlang]性能测试与编译期优化

庆亮发表于 2014-10-10 05:23:03

之前有同学对list_to_atom与list_to_existing_atom 中的结论2表示疑惑并给出了对应的测试代码：

-module(test_atom).

-export([test1/0, test2/0]).

%%
-define(LOOP_TIMES, 1000000).

test1() ->
    T1 = unixtime(),
    [begin erlang:list_to_atom("one") end || _ <- lists:seq(1, ?LOOP_TIMES)],
    io:format("test1:~pms~n", [unixtime()-T1]).

test2() ->
    erlang:list_to_atom("one"),
    
    T1 = unixtime(),
    [begin erlang:list_to_existing_atom("one") end || _ <- lists:seq(1, ?LOOP_TIMES)],
    io:format("test2:~pms~n", [unixtime()-T1]).

unixtime() ->
    {M, S, L} = erlang:now(),
    (M * 1000 + S) * 1000 + L div 1000.

测试后的结果如下(不同机器表现会稍有不同)：

test1: 39ms
test2: 750ms

看了这样的结果我也很疑惑，从to_atom两个函数的实现源码来看这个现象很矛盾。反复查看源码(见上一篇博客连接)确认无误后，尝试使用-S编译源码：

erlc -S test_atom.erl

查看 test_atom.S文件，得到结果(只截取部分)：

{function, '-test2/0-lc$^0/1-0-', 1, 13}.
  {label,12}.
    {line,[{location,"test_atom.erl",17}]}.
    {func_info,{atom,test_atom},{atom,'-test2/0-lc$^0/1-0-'},1}.
  {label,13}.
    {test,is_nonempty_list,{f,14},[{x,0}]}.
    {allocate,1,1}.
    {get_list,{x,0},{x,1},{y,0}}.
    {move,{literal,"one"},{x,0}}.
    {line,[{location,"test_atom.erl",17}]}.
    {call_ext,1,{extfunc,erlang,list_to_existing_atom,1}}.
    {move,{x,0},{x,1}}.
    {move,{y,0},{x,0}}.
    {move,{x,1},{y,0}}.
    {line,[{location,"test_atom.erl",17}]}.
    {call,1,{f,13}}.
    {test_heap,2,1}.
    {put_list,{y,0},{x,0},{x,0}}.
    {deallocate,1}.
    return.
  {label,14}.
    {test,is_nil,{f,12},[{x,0}]}.
    return.


{function, '-test1/0-lc$^0/1-0-', 1, 16}.
  {label,15}.
    {line,[{location,"test_atom.erl",9}]}.
    {func_info,{atom,test_atom},{atom,'-test1/0-lc$^0/1-0-'},1}.
  {label,16}.
    {test,is_nonempty_list,{f,17},[{x,0}]}.
    {allocate,0,1}.
    {get_list,{x,0},{x,1},{x,0}}.
    {line,[{location,"test_atom.erl",9}]}.
    {call,1,{f,16}}.
    {test_heap,2,1}.
    {put_list,{atom,onexxxx},{x,0},{x,0}}.
    {deallocate,0}.
    return.
  {label,17}.
    {test,is_nil,{f,15},[{x,0}]}.
    return.

原来Erlang在编译期就能确认 erlang:list_to_atom的输出结果，因此直接编译为硬代码输出，无需运行时调用（很多语言都有类似的优化）;例如如果源码中出现a=3+2，那么编译期就会赋值5给a，而不用等到运行时。Ok，下面是调整后的性能测试代码段(注意，尽可能减少代码段本身被干扰的可能性)：

-module(test_atom).

-export([test1/0, test2/0]).

%%
-define(LOOP_TIMES, 1000000).

test1() ->
    List = [begin lists:concat(["one_", N]) end ||  N <- lists:seq(1, ?LOOP_TIMES)],
    T1 = unixtime(),
    [begin erlang:list_to_atom(Ele) end || Ele <- List],
    io:format("test1:~pms~n", [unixtime()-T1]).

test2() ->
    List = [begin lists:concat(["one_", N]) end ||  N <- lists:seq(1, ?LOOP_TIMES)],
    [begin erlang:list_to_atom(Ele) end || Ele <- List],

    T1 = unixtime(),
    [begin erlang:list_to_existing_atom(Ele) end || Ele <- List],
    io:format("test2:~pms~n", [unixtime()-T1]).

unixtime() ->
    {M, S, L} = erlang:now(),
    (M * 1000 + S) * 1000 + L div 1000.

结果：

erl -s test_atom test1 -s erlang halt -noinput -noshell +h 9999999 -smp disable
test1:354ms
erl -s test_atom test2 -s erlang halt -noinput -noshell +h 9999999 -smp disable
test2:138ms

虽然list_to_existing_atom性能有些优势，但是由于都是属于纳秒级别，一般情况下这种优势并无大意义。 list_to_existing_atom更多的是用于防止类似拒绝服务（通过预创建）等问题。