redis read error on connection和Redis server went away错误排查

原创
2017/07/12 11:06
阅读数 5.7K

       最近每小时都会收到几条redis报警,虽然不是什么大问题但还是要排查一下原因。通过捕获redis的两条抛错信息redis read error on connection和Redis server went away,我们打开phpredis扩展的源码找到
static zend_always_inline RedisSock *
redis_sock_get_instance(zval *id TSRMLS_DC, int no_throw)
{
    redis_object *redis;

    if (Z_TYPE_P(id) == IS_OBJECT) {
#if (PHP_MAJOR_VERSION < 7)
        redis = (redis_object *)zend_objects_get_address(id TSRMLS_CC);
#else
        redis = (redis_object *)((char *)Z_OBJ_P(id) - XtOffsetOf(redis_object, std));
#endif
        if (redis->sock) {
            return redis->sock;
        }
    }
    // Throw an exception unless we've been requested not to
    if (!no_throw) {
        zend_throw_exception(redis_exception_ce, "Redis server went away", 0 TSRMLS_CC);
    }
    return NULL;
}

这个方法的判断逻辑是 redis结构体里的socket成员变量为空就会抛出这个错误,这个方法被调用的地方很多,需要从头看起。

首先在php中初始化链接,会调用

PHP_METHOD(Redis, connect)
{
    if (redis_connect(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0) == FAILURE) {
        RETURN_FALSE;
    } else {
        RETURN_TRUE;
    }
}

这是connect方法,调用redis_connect并传入全部参数。


PHP_REDIS_API int
redis_connect(INTERNAL_FUNCTION_PARAMETERS, int persistent)
{
    zval *object;
    char *host = NULL, *persistent_id = NULL;
    zend_long port = -1, retry_interval = 0;
    strlen_t host_len, persistent_id_len;
    double timeout = 0.0, read_timeout = 0.0;
    redis_object *redis;

#ifdef ZTS
    /* not sure how in threaded mode this works so disabled persistence at
     * first */
    persistent = 0;
#endif

    if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(),
                                     "Os|ldsld", &object, redis_ce, &host,
                                     &host_len, &port, &timeout, &persistent_id,
                                     &persistent_id_len, &retry_interval,
                                     &read_timeout) == FAILURE)
    {
        return FAILURE;
    } else if (!persistent) {
        persistent_id = NULL;
    }

    if (timeout < 0L || timeout > INT_MAX) {
        zend_throw_exception(redis_exception_ce,
            "Invalid connect timeout", 0 TSRMLS_CC);
        return FAILURE;
    }

    if (read_timeout < 0L || read_timeout > INT_MAX) {
        zend_throw_exception(redis_exception_ce,
            "Invalid read timeout", 0 TSRMLS_CC);
        return FAILURE;
    }

    if (retry_interval < 0L || retry_interval > INT_MAX) {
        zend_throw_exception(redis_exception_ce, "Invalid retry interval",
            0 TSRMLS_CC);
        return FAILURE;
    }

    /* If it's not a unix socket, set to default */
    if(port == -1 && host_len && host[0] != '/') {
        port = 6379;
    }

#if (PHP_MAJOR_VERSION < 7)
    redis = (redis_object *)zend_objects_get_address(object TSRMLS_CC);
#else
    redis = (redis_object *)((char *)Z_OBJ_P(object) - XtOffsetOf(redis_object, std));
#endif
    /* if there is a redis sock already we have to remove it */
    if (redis->sock) {
        redis_sock_disconnect(redis->sock TSRMLS_CC);
        redis_free_socket(redis->sock);
    }

    redis->sock = redis_sock_create(host, host_len, port, timeout, read_timeout, persistent,
        persistent_id, retry_interval, 0);

    if (redis_sock_server_open(redis->sock TSRMLS_CC) < 0) {
        redis_free_socket(redis->sock);
        redis->sock = NULL;
        return FAILURE;
    }

    return SUCCESS;
}

redis创建了一个socket链接并将资源句柄存入redis->socket,下面是socket创建socket过程
/**
 * redis_sock_create
 */
PHP_REDIS_API RedisSock*
redis_sock_create(char *host, int host_len, unsigned short port,
                  double timeout, double read_timeout,
                  int persistent, char *persistent_id,
                  long retry_interval, zend_bool lazy_connect)
{
    RedisSock *redis_sock;

    redis_sock         = ecalloc(1, sizeof(RedisSock));
    redis_sock->host   = estrndup(host, host_len);
    redis_sock->stream = NULL;
    redis_sock->status = REDIS_SOCK_STATUS_DISCONNECTED;
    redis_sock->watching = 0;
    redis_sock->dbNumber = 0;
    redis_sock->retry_interval = retry_interval * 1000;
    redis_sock->persistent = persistent;
    redis_sock->lazy_connect = lazy_connect;
    redis_sock->persistent_id = NULL;

    if(persistent_id) {
        redis_sock->persistent_id = estrdup(persistent_id);
    }

    redis_sock->port    = port;
    redis_sock->timeout = timeout;
    redis_sock->read_timeout = read_timeout;

    redis_sock->serializer = REDIS_SERIALIZER_NONE;
    redis_sock->mode = ATOMIC;
    redis_sock->head = NULL;
    redis_sock->current = NULL;

    redis_sock->pipeline_cmd = NULL;
    redis_sock->pipeline_len = 0;

    redis_sock->err = NULL;
    redis_sock->err_len = 0;

    redis_sock->scan = REDIS_SCAN_NORETRY;

    redis_sock->readonly = 0;

    return redis_sock;
}

 

如果在php中使用mget会调用


/* {{{ proto array Redis::getMultiple(array keys)
 */
PHP_METHOD(Redis, getMultiple)
{
    zval *object, *z_args, *z_ele;
    HashTable *hash;
    RedisSock *redis_sock;
    smart_string cmd = {0};
    int arg_count;

    /* Make sure we have proper arguments */
    if(zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa",
                                    &object, redis_ce, &z_args) == FAILURE) {
        RETURN_FALSE;
    }

    /* We'll need the socket */
    if ((redis_sock = redis_sock_get(object TSRMLS_CC, 0)) == NULL) {
        RETURN_FALSE;
    }

    /* Grab our array */
    hash = Z_ARRVAL_P(z_args);

    /* We don't need to do anything if there aren't any keys */
    if((arg_count = zend_hash_num_elements(hash)) == 0) {
        RETURN_FALSE;
    }

    /* Build our command header */
    redis_cmd_init_sstr(&cmd, arg_count, "MGET", 4);

    /* Iterate through and grab our keys */
    ZEND_HASH_FOREACH_VAL(hash, z_ele) {
        zend_string *zstr = zval_get_string(z_ele);
        redis_cmd_append_sstr_key(&cmd, zstr->val, zstr->len, redis_sock, NULL);
        zend_string_release(zstr);
    } ZEND_HASH_FOREACH_END();

    /* Kick off our command */
    REDIS_PROCESS_REQUEST(redis_sock, cmd.c, cmd.len);
    IF_ATOMIC() {
        if(redis_sock_read_multibulk_reply(INTERNAL_FUNCTION_PARAM_PASSTHRU,
                                           redis_sock, NULL, NULL) < 0) {
            RETURN_FALSE;
        }
    }
    REDIS_PROCESS_RESPONSE(redis_sock_read_multibulk_reply);
}
 

如果redis_sock_get为空会返回false,再看下redis_sock_get


PHP_REDIS_API RedisSock *
redis_sock_get(zval *id TSRMLS_DC, int no_throw)
{
    RedisSock *redis_sock;

    if ((redis_sock = redis_sock_get_instance(id TSRMLS_CC, no_throw)) == NULL) {
        return NULL;
    }

    if (redis_sock->lazy_connect) {
        redis_sock->lazy_connect = 0;
        if (redis_sock_server_open(redis_sock TSRMLS_CC) < 0) {
            return NULL;
        }
    }

    return redis_sock;
}
 

结果发现redis_sock_get_instance就是文章开始抛错的方法,是什么原因导致了redis结构体存储的socket丢失了呢,如果刚存入socket就丢失显然是不符合逻辑的,很可能是在传输过程中丢失的,我们看下PHP_METHOD(Redis, getMultiple)中的redis_sock_read_multibulk_reply方法:


/**
 * redis_sock_read_multibulk_reply
 */
PHP_REDIS_API int redis_sock_read_multibulk_reply(INTERNAL_FUNCTION_PARAMETERS,
                                           RedisSock *redis_sock, zval *z_tab,
                                           void *ctx)
{
    char inbuf[4096];
    int numElems;
    size_t len;

    if (redis_sock_gets(redis_sock, inbuf, sizeof(inbuf) - 1, &len TSRMLS_CC) < 0) {
        return -1;
    }

    if(inbuf[0] != '*') {
        IF_NOT_ATOMIC() {
            add_next_index_bool(z_tab, 0);
        } else {
            if (inbuf[0] == '-') {
                redis_sock_set_err(redis_sock, inbuf+1, len);
            }
            RETVAL_FALSE;
        }
        return -1;
    }
    numElems = atoi(inbuf+1);
    zval zv, *z_multi_result = &zv;
#if (PHP_MAJOR_VERSION < 7)
    MAKE_STD_ZVAL(z_multi_result);
#endif
    array_init(z_multi_result); /* pre-allocate array for multi's results. */

    redis_mbulk_reply_loop(INTERNAL_FUNCTION_PARAM_PASSTHRU, redis_sock,
        z_multi_result, numElems, UNSERIALIZE_ALL);

    IF_NOT_ATOMIC() {
        add_next_index_zval(z_tab, z_multi_result);
    } else {
        RETVAL_ZVAL(z_multi_result, 0, 1);
    }
    /*zval_copy_ctor(return_value); */
    return 0;
}

再看下redis_sock_gets方法:


PHP_REDIS_API int
redis_sock_gets(RedisSock *redis_sock, char *buf, int buf_size,
                size_t *line_size TSRMLS_DC)
{
    // Handle EOF
    if(-1 == redis_check_eof(redis_sock, 0 TSRMLS_CC)) {
        return -1;
    }

    if(php_stream_get_line(redis_sock->stream, buf, buf_size, line_size)
                           == NULL)
    {
        // Close, put our socket state into error
        REDIS_STREAM_CLOSE_MARK_FAILED(redis_sock);

        // Throw a read error exception
        zend_throw_exception(redis_exception_ce, "read error on connection",
            0 TSRMLS_CC);
        return -1;
    }

    /* We don't need \r\n */
    *line_size-=2;
    buf[*line_size]='\0';

    /* Success! */
    return 0;
}
 

如果php_stream_get_line读取stream数据为NUll的时候就会抛出read error on connection这个错误。我们在php源码里找到php_stream_get_line方法:


/* If buf == NULL, the buffer will be allocated automatically and will be of an
 * appropriate length to hold the line, regardless of the line length, memory
 * permitting */
PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
        size_t *returned_len)
{
    size_t avail = 0;
    size_t current_buf_size = 0;
    size_t total_copied = 0;
    int grow_mode = 0;
    char *bufstart = buf;

    if (buf == NULL) {
        grow_mode = 1;
    } else if (maxlen == 0) {
        return NULL;
    }

    /*
     * If the underlying stream operations block when no new data is readable,
     * we need to take extra precautions.
     *
     * If there is buffered data available, we check for a EOL. If it exists,
     * we pass the data immediately back to the caller. This saves a call
     * to the read implementation and will not block where blocking
     * is not necessary at all.
     *
     * If the stream buffer contains more data than the caller requested,
     * we can also avoid that costly step and simply return that data.
     */

    for (;;) {
        avail = stream->writepos - stream->readpos;

        if (avail > 0) {
            size_t cpysz = 0;
            char *readptr;
            const char *eol;
            int done = 0;

            readptr = (char*)stream->readbuf + stream->readpos;
            eol = php_stream_locate_eol(stream, NULL);

            if (eol) {
                cpysz = eol - readptr + 1;
                done = 1;
            } else {
                cpysz = avail;
            }

            if (grow_mode) {
                /* allow room for a NUL. If this realloc is really a realloc
                 * (ie: second time around), we get an extra byte. In most
                 * cases, with the default chunk size of 8K, we will only
                 * incur that overhead once.  When people have lines longer
                 * than 8K, we waste 1 byte per additional 8K or so.
                 * That seems acceptable to me, to avoid making this code
                 * hard to follow */
                bufstart = erealloc(bufstart, current_buf_size + cpysz + 1);
                current_buf_size += cpysz + 1;
                buf = bufstart + total_copied;
            } else {
                if (cpysz >= maxlen - 1) {
                    cpysz = maxlen - 1;
                    done = 1;
                }
            }

            memcpy(buf, readptr, cpysz);

            stream->position += cpysz;
            stream->readpos += cpysz;
            buf += cpysz;
            maxlen -= cpysz;
            total_copied += cpysz;

            if (done) {
                break;
            }
        } else if (stream->eof) {
            break;
        } else {
            /* XXX: Should be fine to always read chunk_size */
            size_t toread;

            if (grow_mode) {
                toread = stream->chunk_size;
            } else {
                toread = maxlen - 1;
                if (toread > stream->chunk_size) {
                    toread = stream->chunk_size;
                }
            }

            php_stream_fill_read_buffer(stream, toread);

            if (stream->writepos - stream->readpos == 0) {
                break;
            }
        }
    }

    if (total_copied == 0) {
        if (grow_mode) {
            assert(bufstart == NULL);
        }
        return NULL;
    }

    buf[0] = '\0';
    if (returned_len) {
        *returned_len = total_copied;
    }

    return bufstart;
}
只有bufstart=NULL的时候才会返回NULL,bufstart=NULL说明并未在buf缓冲和stream中接收到任何数据,包括终止符,只能推测在接收过程中网络断开或者其他原因导致的数据未收到,但是通过网络监测未发现网络断开问题,最终在redis_sock_create方法中发现有redis_sock->read_timeout = read_timeout;设置,最终定位是接收数据的超时导致php_stream_get_line为空,默认的超时时间是4秒,超时然后会调用 REDIS_STREAM_CLOSE_MARK_FAILED(redis_sock)方法,在头文件找到

这个方法就会关闭socket链接并置空,再执行其他命令调用redis_sock_get_instance时候,判断soket为空就会抛出Redis server went away的错误。

定位到原因后,对php程序排查后发现在某些特定情况下使用mget取出的数据太大,超过500M,网卡带宽的限制下传输超过4秒是很正常的事情,比较好的解决方法就是调整php代码或者在redis链接的时候设置延长read_timeout时间。我们再看下redis_connect接收的参数

在php中redis链接的时候可以这么设置$redis->connect($host, $port, $timeout, $persistent_id, $read_timeout);$read_timeout单位是秒。

转载请注明出处:https://my.oschina.net/u/554660/blog/1358156

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部