手册中是这样描述array_merge的
array_merge() 将一个或多个数组的单元合并起来,一个数组中的值附加在前一个数组的后面。返回作为结果的数组。 如果输入的数组中有相同的字符串键名,则该键名后面的值将覆盖前一个值。然而,如果数组包含数字键名,后面的值将不会覆盖原来的值,而是附加到后面。 如果只给了一个数组并且该数组是数字索引的,则键名会以连续方式重新索引。
但是手册中没有详细的描述对于两个数组使用加法运算(+)的时候的具体的操作方式与结果。
举两个例子
'a', 'b' => 'b', 'c' => 'c' ); $arr_2 = array( 'b' => 's', 'c' => 'c', 'd' => 'd' ); $arr_3 = $arr_1 + $arr_2; print_r($arr_3);
输出分别是什么呢,看下面
Array ( [0] => a [1] => b [2] => c ) Array ( [a] => a [b] => b [c] => c [d] => d )
从上面的结果中可以分析得到+的运算方式:如果数组中有相同的key(不管是数字还是字符串),则会把最先出现的值作为最终结果返回,而把后面的数组拥有相同键名的那些值“抛弃”掉(注意:不是覆盖而是保留最先出现的那个值)。
接下来从源码来分析一下看看array_merge和+的具体运算方式,先来说array_merge
array_merge源码分析
/* array_merge */ PHP_FUNCTION(array_merge) { php_array_merge_or_replace_wrapper(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); }
/* php_array_merge_or_replace_wrapper */ static void php_array_merge_or_replace_wrapper(INTERNAL_FUNCTION_PARAMETERS, int recursive, int replace) /* {{{ */ { zval ***args = NULL; int argc, i, init_size = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "+", &args;, &argc;) == FAILURE) { return; } for (i = 0; i < argc; i++) { if (Z_TYPE_PP(args[i]) != IS_ARRAY) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument #%d is not an array", i + 1); efree(args); RETURN_NULL(); } else { int num = zend_hash_num_elements(Z_ARRVAL_PP(args[i])); if (num > init_size) { init_size = num; } } } //初始化一个与传递进来的最大元素的数组相同大小的返回数组,这里这么做是至少减少一次数组的resize操作 array_init_size(return_value, init_size); //宣循环处理传递进来的每一个数组,将它合并到需要返回给用户的数组中 for (i = 0; i < argc; i++) { SEPARATE_ZVAL(args[i]); if (!replace) { //从传递进来的值replace=0判断,会进入下面的一步 php_array_merge(Z_ARRVAL_P(return_value), Z_ARRVAL_PP(args[i]), recursive TSRMLS_CC); } else if (recursive && i > 0) { /* First array will be copied directly instead */ php_array_replace_recursive(Z_ARRVAL_P(return_value), Z_ARRVAL_PP(args[i]) TSRMLS_CC); } else { zend_hash_merge(Z_ARRVAL_P(return_value), Z_ARRVAL_PP(args[i]), (copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval *), 1); } } efree(args); }
PHPAPI int php_array_merge(HashTable *dest, HashTable *src, int recursive TSRMLS_DC) /* {{{ */ { zval **src_entry, **dest_entry; char *string_key; uint string_key_len; ulong num_key; HashPosition pos; zend_hash_internal_pointer_reset_ex(src, &pos;); while (zend_hash_get_current_data_ex(src, (void **)&src;_entry, &pos;) == SUCCESS) { switch (zend_hash_get_current_key_ex(src, &string;_key, &string;_key_len, #_key, 0, &pos;)) { case HASH_KEY_IS_STRING: //key为字符串的情况,但是上面传入的recursive=0 if (recursive && zend_hash_find(dest, string_key, string_key_len, (void **)&dest;_entry) == SUCCESS) { HashTable *thash = Z_TYPE_PP(dest_entry) == IS_ARRAY ? Z_ARRVAL_PP(dest_entry) : NULL; if ((thash && thash->nApplyCount > 1) || (*src_entry == *dest_entry && Z_ISREF_PP(dest_entry) && (Z_REFCOUNT_PP(dest_entry) % 2))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "recursion detected"); return 0; } SEPARATE_ZVAL(dest_entry); SEPARATE_ZVAL(src_entry); if (Z_TYPE_PP(dest_entry) == IS_NULL) { convert_to_array_ex(dest_entry); add_next_index_null(*dest_entry); } else { convert_to_array_ex(dest_entry); } if (Z_TYPE_PP(src_entry) == IS_NULL) { convert_to_array_ex(src_entry); add_next_index_null(*src_entry); } else { convert_to_array_ex(src_entry); } if (thash) { thash->nApplyCount++; } if (!php_array_merge(Z_ARRVAL_PP(dest_entry), Z_ARRVAL_PP(src_entry), recursive TSRMLS_CC)) { if (thash) { thash->nApplyCount--; } return 0; } if (thash) { thash->nApplyCount--; } } else { //通过下面的zend_hash_update将值加入到返回数组中,而zend_hash_update做的操作就是当数组中不存在当前值的时候就插入,存在的时候就更新 Z_ADDREF_PP(src_entry); zend_hash_update(dest, string_key, string_key_len, src_entry, sizeof(zval *), NULL); } break; case HASH_KEY_IS_LONG: //key为数字时候直接将值插入返回数组中 Z_ADDREF_PP(src_entry); zend_hash_next_index_insert(dest, src_entry, sizeof(zval *), NULL); break; } zend_hash_move_forward_ex(src, &pos;); } return 1; }
从上面的操作可以看出array_merge就是遍历传入的所有数组,然后将里面的值全部提取出来放到一个大的新数组里面,然后将这个新数组返回给调用者
接下来分析+运算符的源码实现,通过opdumper可以看到加法操作对用的opcode是ZEND_ADD,然后对应的函数是add_function,源码如下
ZEND_API int add_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) /* {{{ */ { zval op1_copy, op2_copy; int converted = 0; while (1) { switch (TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) { /* 相加的两个数都是long类型,值直接相加 */ case TYPE_PAIR(IS_LONG, IS_LONG): { long lval = Z_LVAL_P(op1) + Z_LVAL_P(op2); /* check for overflow by comparing sign bits */ if ((Z_LVAL_P(op1) & LONG_SIGN_MASK) == (Z_LVAL_P(op2) & LONG_SIGN_MASK) && (Z_LVAL_P(op1) & LONG_SIGN_MASK) != (lval & LONG_SIGN_MASK)) { ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2)); } else { ZVAL_LONG(result, lval); } return SUCCESS; } /* 一个long,一个double,强制转换成double运算 */ case TYPE_PAIR(IS_LONG, IS_DOUBLE): ZVAL_DOUBLE(result, ((double)Z_LVAL_P(op1)) + Z_DVAL_P(op2)); return SUCCESS; /* 一个long,一个double,强制转换成double运算 */ case TYPE_PAIR(IS_DOUBLE, IS_LONG): ZVAL_DOUBLE(result, Z_DVAL_P(op1) + ((double)Z_LVAL_P(op2))); return SUCCESS; /* 两个double直接相加 */ case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE): ZVAL_DOUBLE(result, Z_DVAL_P(op1) + Z_DVAL_P(op2)); return SUCCESS; /* 两个数组,这里才是我们需要的地方 */ case TYPE_PAIR(IS_ARRAY, IS_ARRAY): { zval *tmp; if ((result == op1) && (result == op2)) { /* $a += $a */ return SUCCESS; } if (result != op1) { *result = *op1; zval_copy_ctor(result); } /* 最重要的是这里,两个数组的+运算是通过zend_hash_merge来实现的,下面继续zend_hash_merge */ zend_hash_merge(Z_ARRVAL_P(result), Z_ARRVAL_P(op2), (void (*)(void *pData)) zval_add_ref, (void *) &tmp;, sizeof(zval *), 0); return SUCCESS; } default: if (!converted) { zendi_convert_scalar_to_number(op1, op1_copy, result); zendi_convert_scalar_to_number(op2, op2_copy, result); converted = 1; } else { zend_error(E_ERROR, "Unsupported operand types"); return FAILURE; /* unknown datatype */ } } } }
#define zend_hash_merge(target, source, pCopyConstructor, tmp, size, overwrite) \ _zend_hash_merge(target, source, pCopyConstructor, tmp, size, overwrite ZEND_FILE_LINE_CC)
ZEND_API void _zend_hash_merge(HashTable *target, HashTable *source, copy_ctor_func_t pCopyConstructor, void *tmp, uint size, int overwrite ZEND_FILE_LINE_DC) { Bucket *p; void *t; //这里传递过来的overwrite的值是0,所以对应的操作是HASH_ADD int mode = (overwrite?HASH_UPDATE:HASH_ADD); IS_CONSISTENT(source); IS_CONSISTENT(target); p = source->pListHead; while (p) { if (p->nKeyLength>0) { //key的长度大于0,说明key是字符串key,mode=HASH_ADD时候_zend_hash_quick_add_or_update当hashtable中存在当前key的时候就添加失败,而不是去更新它,所以就保留了之前的值 if (_zend_hash_quick_add_or_update(target, p->arKey, p->nKeyLength, p->h, p->pData, size, &t;, mode ZEND_FILE_LINE_RELAY_CC)==SUCCESS && pCopyConstructor) { pCopyConstructor(t); } } else { //key为数字,当前的mode是HASH_ADD,所以当数字key在hashtable已经存在了的情况下就不进行任何操作了,所以就不会覆盖之前的值,而key不存在的话就会调用zend_hash_index_update将它加入hashtab中 if ((mode==HASH_UPDATE || !zend_hash_index_exists(target, p->h)) && zend_hash_index_update(target, p->h, p->pData, size, &t;)==SUCCESS && pCopyConstructor) { pCopyConstructor(t); } } p = p->pListNext; } target->pInternalPointer = target->pListHead; }
ZEND_API int _zend_hash_add_or_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData, uint nDataSize, void **pDest, int flag ZEND_FILE_LINE_DC) { ulong h; uint nIndex; Bucket *p; #ifdef ZEND_SIGNALS TSRMLS_FETCH(); #endif IS_CONSISTENT(ht); if (nKeyLength <= 0) { #if ZEND_DEBUG ZEND_PUTS("zend_hash_update: Can't put in empty key\n"); #endif return FAILURE; } CHECK_INIT(ht); h = zend_inline_hash_func(arKey, nKeyLength); nIndex = h & ht->nTableMask; p = ht->arBuckets[nIndex]; while (p != NULL) { if (p->arKey == arKey || ((p->h == h) && (p->nKeyLength == nKeyLength) && !memcmp(p->arKey, arKey, nKeyLength))) { if (flag & HASH_ADD) { //key为字符串,并且当前key在hashtable已经存在,并且当前模式为HASH_ADD时候添加失败 return FAILURE; } HANDLE_BLOCK_INTERRUPTIONS(); #if ZEND_DEBUG if (p->pData == pData) { ZEND_PUTS("Fatal error in zend_hash_update: p->pData == pData\n"); HANDLE_UNBLOCK_INTERRUPTIONS(); return FAILURE; } #endif if (ht->pDestructor) { ht->pDestructor(p->pData); } UPDATE_DATA(ht, p, pData, nDataSize); if (pDest) { *pDest = p->pData; } HANDLE_UNBLOCK_INTERRUPTIONS(); return SUCCESS; } p = p->pNext; } if (IS_INTERNED(arKey)) { p = (Bucket *) pemalloc(sizeof(Bucket), ht->persistent); if (!p) { return FAILURE; } p->arKey = arKey; } else { p = (Bucket *) pemalloc(sizeof(Bucket) + nKeyLength, ht->persistent); if (!p) { return FAILURE; } p->arKey = (const char*)(p + 1); memcpy((char*)p->arKey, arKey, nKeyLength); } p->nKeyLength = nKeyLength; INIT_DATA(ht, p, pData, nDataSize); p->h = h; CONNECT_TO_BUCKET_DLLIST(p, ht->arBuckets[nIndex]); if (pDest) { *pDest = p->pData; } HANDLE_BLOCK_INTERRUPTIONS(); CONNECT_TO_GLOBAL_DLLIST(p, ht); ht->arBuckets[nIndex] = p; HANDLE_UNBLOCK_INTERRUPTIONS(); ht->nNumOfElements++; ZEND_HASH_IF_FULL_DO_RESIZE(ht); /* If the Hash table is full, resize it */ return SUCCESS; }
#define zend_hash_index_update(ht, h, pData, nDataSize, pDest) \ _zend_hash_index_update_or_next_insert(ht, h, pData, nDataSize, pDest, HASH_UPDATE ZEND_FILE_LINE_CC)
有些api就不具体往里面分析了,分析到这里能看出array_merge和+操作他们分别的运算方式了
总结一下: