IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    [erlang]list_to_atom与list_to_existing_atom

    庆亮发表于 2014-04-24 07:50:01
    love 0

    前段时间有同事对项目中的一段代码表示疑问:

    list_to_atom(List) when is_list(List) ->
        case catch(list_to_existing_atom(List)) of
            {'EXIT', _} -> erlang:list_to_atom(List);
            Atom when is_atom(Atom) -> Atom
        end.
    

    疑问:为什么这里需要先尝试list_to_existing_atom?这里应该是无法防范atom资源溢出问题的。顺手就翻了源码(R16B02)看看:

    BIF_RETTYPE list_to_atom_1(BIF_ALIST_1)
    {
        Eterm res;
        char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS);
        int i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS);
    
        if (i < 0) {
    	erts_free(ERTS_ALC_T_TMP, (void *) buf);
    	i = list_length(BIF_ARG_1);
    	if (i > MAX_ATOM_CHARACTERS) {
    	    BIF_ERROR(BIF_P, SYSTEM_LIMIT);
    	}
    	BIF_ERROR(BIF_P, BADARG);
        }
        res = erts_atom_put((byte *) buf, i, ERTS_ATOM_ENC_LATIN1, 1);
        ASSERT(is_atom(res));
        erts_free(ERTS_ALC_T_TMP, (void *) buf);
        BIF_RET(res);
    }
    
    /* conditionally convert a list of ascii integers to an atom */
     
    BIF_RETTYPE list_to_existing_atom_1(BIF_ALIST_1)
    {
        int i;
        char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS);
    
        if ((i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS)) < 0) {
        error:
    	erts_free(ERTS_ALC_T_TMP, (void *) buf);
    	BIF_ERROR(BIF_P, BADARG);
        } else {
    	Eterm a;
    	
    	if (erts_atom_get(buf, i, &a;, ERTS_ATOM_ENC_LATIN1)) {
    	    erts_free(ERTS_ALC_T_TMP, (void *) buf);
    	    BIF_RET(a);
    	} else {
    	    goto error;
    	}
        }
    }
    

    list_to_atom和list_to_existing_atom的主要差别是后者直接拿hash表中的结果,前者是尝试创建。再看erts_atom_put:

    /*
     * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
     */
    Eterm
    erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc)
    {
        byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
        const byte *text = name;
        int tlen = len;
        Sint no_latin1_chars;
        Atom a;
        int aix;
    
    #ifdef ERTS_ATOM_PUT_OPS_STAT
        erts_smp_atomic_inc_nob(&atom;_put_ops);
    #endif
    
        if (tlen < 0) {
    	if (trunc)
    	    tlen = 0;
    	else
    	    return THE_NON_VALUE;
        }
    
        switch (enc) {
        case ERTS_ATOM_ENC_7BIT_ASCII:
    	if (tlen > MAX_ATOM_CHARACTERS) {
    	    if (trunc)
    		tlen = MAX_ATOM_CHARACTERS;
    	    else
    		return THE_NON_VALUE;
    	}
    #ifdef DEBUG
    	for (aix = 0; aix < len; aix++) {
    	    ASSERT((name[aix] & 0x80) == 0);
    	}
    #endif
    	no_latin1_chars = tlen;
    	break;
        case ERTS_ATOM_ENC_LATIN1:
    	if (tlen > MAX_ATOM_CHARACTERS) {
    	    if (trunc)
    		tlen = MAX_ATOM_CHARACTERS;
    	    else
    		return THE_NON_VALUE;
    	}
    	no_latin1_chars = tlen;
    	latin1_to_utf8(utf8_copy, &text;, &tlen;);
    	break;
        case ERTS_ATOM_ENC_UTF8:
    	/* First sanity check; need to verify later */
    	if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
    	    return THE_NON_VALUE;
    	break;
        }
    
        a.len = tlen;
        a.name = (byte *) text;
        atom_read_lock();
        aix = index_get(&erts;_atom_table, (void*) &a;);
        atom_read_unlock();
        if (aix >= 0) {
    	/* Already in table no need to verify it */
    	return make_atom(aix);
        }
    
        if (enc == ERTS_ATOM_ENC_UTF8) {
    	/* Need to verify encoding and length */
    	byte *err_pos;
    	Uint no_chars;
    	switch (erts_analyze_utf8_x((byte *) text,
    				    (Uint) tlen,
    				    &err;_pos,
    				    &no;_chars, NULL,
    				    &no;_latin1_chars,
    				    MAX_ATOM_CHARACTERS)) {
    	case ERTS_UTF8_OK:
    	    ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
    	    break;
    	case ERTS_UTF8_OK_MAX_CHARS:
    	    /* Truncated... */
    	    if (!trunc)
    		return THE_NON_VALUE;
    	    ASSERT(no_chars == MAX_ATOM_CHARACTERS);
    	    tlen = err_pos - text;
    	    break;
    	default:
    	    /* Bad utf8... */
    	    return THE_NON_VALUE;
    	}
        }
    
        ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
        ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
    
        a.len = tlen;
        a.latin1_chars = (Sint16) no_latin1_chars;
        a.name = (byte *) text;
        atom_write_lock();
        aix = index_put(&erts;_atom_table, (void*) &a;);
        atom_write_unlock();
        return make_atom(aix);
    }
    

    erts_atom_put会先去判断是否atom存在了,存在则直接返回。

    所以:
    1. list_to_atom 在相同atom结果下不会新产生atom
    2. list_to_existing_atom 在atom第二次产生时性能会稍快
    3. 先尝试to_existing只是在atom经常重复出现时才会划算



沪ICP备19023445号-2号
友情链接