前段时间有同事对项目中的一段代码表示疑问:
list_to_atom(List) when is_list(List) -> case catch(list_to_existing_atom(List)) of {'EXIT', _} -> erlang:list_to_atom(List); Atom when is_atom(Atom) -> Atom end.
疑问:为什么这里需要先尝试list_to_existing_atom?这里应该是无法防范atom资源溢出问题的。顺手就翻了源码(R16B02)看看:
BIF_RETTYPE list_to_atom_1(BIF_ALIST_1) { Eterm res; char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS); int i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS); if (i < 0) { erts_free(ERTS_ALC_T_TMP, (void *) buf); i = list_length(BIF_ARG_1); if (i > MAX_ATOM_CHARACTERS) { BIF_ERROR(BIF_P, SYSTEM_LIMIT); } BIF_ERROR(BIF_P, BADARG); } res = erts_atom_put((byte *) buf, i, ERTS_ATOM_ENC_LATIN1, 1); ASSERT(is_atom(res)); erts_free(ERTS_ALC_T_TMP, (void *) buf); BIF_RET(res); } /* conditionally convert a list of ascii integers to an atom */ BIF_RETTYPE list_to_existing_atom_1(BIF_ALIST_1) { int i; char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS); if ((i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS)) < 0) { error: erts_free(ERTS_ALC_T_TMP, (void *) buf); BIF_ERROR(BIF_P, BADARG); } else { Eterm a; if (erts_atom_get(buf, i, &a;, ERTS_ATOM_ENC_LATIN1)) { erts_free(ERTS_ALC_T_TMP, (void *) buf); BIF_RET(a); } else { goto error; } } }
list_to_atom和list_to_existing_atom的主要差别是后者直接拿hash表中的结果,前者是尝试创建。再看erts_atom_put:
/* * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned! */ Eterm erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) { byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1]; const byte *text = name; int tlen = len; Sint no_latin1_chars; Atom a; int aix; #ifdef ERTS_ATOM_PUT_OPS_STAT erts_smp_atomic_inc_nob(&atom;_put_ops); #endif if (tlen < 0) { if (trunc) tlen = 0; else return THE_NON_VALUE; } switch (enc) { case ERTS_ATOM_ENC_7BIT_ASCII: if (tlen > MAX_ATOM_CHARACTERS) { if (trunc) tlen = MAX_ATOM_CHARACTERS; else return THE_NON_VALUE; } #ifdef DEBUG for (aix = 0; aix < len; aix++) { ASSERT((name[aix] & 0x80) == 0); } #endif no_latin1_chars = tlen; break; case ERTS_ATOM_ENC_LATIN1: if (tlen > MAX_ATOM_CHARACTERS) { if (trunc) tlen = MAX_ATOM_CHARACTERS; else return THE_NON_VALUE; } no_latin1_chars = tlen; latin1_to_utf8(utf8_copy, &text;, &tlen;); break; case ERTS_ATOM_ENC_UTF8: /* First sanity check; need to verify later */ if (tlen > MAX_ATOM_SZ_LIMIT && !trunc) return THE_NON_VALUE; break; } a.len = tlen; a.name = (byte *) text; atom_read_lock(); aix = index_get(&erts;_atom_table, (void*) &a;); atom_read_unlock(); if (aix >= 0) { /* Already in table no need to verify it */ return make_atom(aix); } if (enc == ERTS_ATOM_ENC_UTF8) { /* Need to verify encoding and length */ byte *err_pos; Uint no_chars; switch (erts_analyze_utf8_x((byte *) text, (Uint) tlen, &err;_pos, &no;_chars, NULL, &no;_latin1_chars, MAX_ATOM_CHARACTERS)) { case ERTS_UTF8_OK: ASSERT(no_chars <= MAX_ATOM_CHARACTERS); break; case ERTS_UTF8_OK_MAX_CHARS: /* Truncated... */ if (!trunc) return THE_NON_VALUE; ASSERT(no_chars == MAX_ATOM_CHARACTERS); tlen = err_pos - text; break; default: /* Bad utf8... */ return THE_NON_VALUE; } } ASSERT(tlen <= MAX_ATOM_SZ_LIMIT); ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS); a.len = tlen; a.latin1_chars = (Sint16) no_latin1_chars; a.name = (byte *) text; atom_write_lock(); aix = index_put(&erts;_atom_table, (void*) &a;); atom_write_unlock(); return make_atom(aix); }
erts_atom_put会先去判断是否atom存在了,存在则直接返回。
所以:
1. list_to_atom 在相同atom结果下不会新产生atom
2. list_to_existing_atom 在atom第二次产生时性能会稍快
3. 先尝试to_existing只是在atom经常重复出现时才会划算