IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    sheepdog源码分析之关键数据结构介绍

    admin发表于 2011-03-15 09:29:08
    love 0

    关键数据结构的说明

    1.

    struct  sd_req {

    uint8_t   proto_ver;

    uint8_t   opcode; //操作类型

    uint16_t   flags;//

    uint32_t   epoch;

    uint32_t  id;

    uint32_t  data_length;

    uint32_t   opcode_specific[8];

    };

    struct  sd_rsp {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t       flags;

    uint32_t       epoch;

    uint32_t     id;

    uint32_t      data_length;

    uint32_t      result;

    uint32_t        opcode_specific[7];

    };

    这两个数据结构应该是作为抽象类,可以看出sizeof(struct sd_req) == sizeof(struct sd_rsp),这个是设计者故意为之,因为在发送请求和接收响应时,客户端是使用同一片内存区域;

    2.

    struct  sd_obj_req {

    uint8_t     proto_ver;

    uint8_t     opcode;

    uint16_t    flags;

    uint32_t    epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint64_t        oid;//object id

    uint64_t        cow_oid;

    uint32_t        copies;//副本个数

    uint32_t        tgt_epoch;

    uint64_t        offset;

    };

    struct  sd_obj_rsp {

    uint8_t     proto_ver;

    uint8_t     opcode;

    uint16_t    flags;

    uint32_t    epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint32_t        result;

    uint32_t        copies;

    uint32_t        pad[6];

    };

    对object进行请求及响应,这里需要说明的一点:object在Sheepdog中作为数据存储单元,分为data_object 和vdi_object,分别存储数据和vdi的元数据,即后面提到的sheepdog_inode的内容,分片大小为4M。不知作者为何分这么小的分片?

    struct sd_vdi_req {

    uint8_t proto_ver;

    uint8_t opcode;

    uint16_t flags;

    uint32_t epoch;

    uint32_t id;

    uint32_t data_length;

    uint64_t vdi_size; //vdi的大小

    uint32_t base_vdi_id;

    uint32_t copies;

    uint32_t snapid;

    uint32_t pad[3];

    };

    struct sd_vdi_rsp {

    uint8_t proto_ver;

    uint8_t opcode;

    uint16_t flags;

    uint32_t epoch;

    uint32_t id;

    uint32_t data_length;

    uint32_t result;

    uint32_t rsvd;

    uint32_t vdi_id;

    uint32_t pad[5];

    };

    对vdi进行有关操作的请求和响应

    3.

    struct  sd_vdi_req {

    uint8_t     proto_ver;

    uint8_t     opcode;

    uint16_t    flags;

    uint32_t    epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint64_t    vdi_size; //vdi的大小

    uint32_t        base_vdi_id;

    uint32_t    copies;

    uint32_t        snapid;

    uint32_t        pad[3];

    };

    struct  sd_vdi_rsp {

    uint8_t     proto_ver;

    uint8_t     opcode;

    uint16_t    flags;

    uint32_t    epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint32_t        result;

    uint32_t        rsvd;

    uint32_t        vdi_id;

    uint32_t        pad[5];

    };

    对vdi进行有关操作的请求和响应

    4

    struct  sd_so_req {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t   flags;

    uint32_t   epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint64_t   oid;

    uint64_t   ctime;

    uint32_t   copies;

    uint32_t   tag;

    uint32_t   opcode_specific[2];

    };

    struct  sd_so_rsp {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t   flags;

    uint32_t   epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint32_t        result;

    uint32_t   copies;

    uint64_t   ctime;

    uint64_t   oid;

    uint32_t   opcode_specific[2];

    };

    这对请求和响应的数据结构,对应的opcode为SD_OP_MAKE_FS,对整个集群进行format,并提供copies参数,指定默认的副本的个数;

    5

    struct  sd_list_req {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t      flags;

    uint32_t      epoch;

    uint32_t     id;

    uint32_t     data_length;

    uint64_t     start; //start_hval

    uint64_t     end; //end_hval

    uint32_t     tgt_epoch; //epoch参数

    uint32_t     pad[3];

    };

    struct  sd_list_rsp {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t   flags;

    uint32_t   epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint32_t        result;

    uint32_t        rsvd;

    uint64_t        next;

    uint32_t        pad[4];

    };

    主要用于SD_OP_GET_OBJ_LIST操作,获得对应区间上node节点上的object list

    6

    struct  sd_node_req {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t      flags;

    uint32_t      epoch;

    uint32_t     id;

    uint32_t     data_length;

    uint32_t      request_ver;

    uint32_t      pad[7];

    };

    struct  sd_node_rsp {

    uint8_t              proto_ver;

    uint8_t              opcode;

    uint16_t      flags;

    uint32_t      epoch;

    uint32_t        id;

    uint32_t        data_length;

    uint32_t        result;

    uint32_t       nr_nodes;

    uint32_t       local_idx;

    uint32_t       master_idx;

    uint64_t       store_size;

    uint64_t       store_free;

    };

    针对SD_OP_STAT_SHEEP和SD_OP_GET_NODE_LIST操作,获得node 详细信息,包括store_size store_free等,或者是获得node list.

    7

    struct  sheepdog_inode {

    char name[SD_MAX_VDI_LEN];  //vdi的名字

    char tag[SD_MAX_VDI_TAG_LEN]; //tag

    uint64_t ctime; // create time

    uint64_t snap_ctime; //snapshot time

    uint64_t vm_clock_nsec;

    uint64_t vdi_size; //vdi size

    uint64_t vm_state_size;

    uint16_t copy_policy;

    uint8_t  nr_copies; //副本的个数

    uint8_t  block_size_shift; //data object size

    uint32_t snap_id; //snapshot of this vdi

    uint32_t vdi_id; //vdi id

    uint32_t parent_vdi_id;

    uint32_t child_vdi_id[MAX_CHILDREN];

    uint32_t data_vdi_id[MAX_DATA_OBJS]; //data object id array

    };

    sheepdog_inode相当于sheep中存储一个镜像文件,都会存在这个结构与之对应,该结构中保存了数据object的id数组,相当与镜像文件的元数据,同时该结构会持久化保存到vid object中;

    8

    enum  conn_state {

    C_IO_HEADER = 0,

    C_IO_DATA_INIT,

    C_IO_DATA,

    C_IO_END,

    C_IO_CLOSED,

    };

    struct  connection {

    int fd;   //sockfd

    enum conn_state  c_rx_state; //当前receive状态

    int rx_length;

    void *rx_buf;

    struct sd_req  rx_hdr;

    enum conn_state  c_tx_state; //当前transfer状态

    int tx_length;

    void *tx_buf;

    struct sd_rsp  tx_hdr;

    };

    struct connection结构存储socket连接的状态信息

    9

    struct  client_info {

    struct connection conn;  //conn state

    struct request *rx_req;  //current rx_req

    struct request *tx_req;  //current tx_req

    struct list_head  reqs;  //client 已经收到的request

    struct list_head done_reqs; //已经处理完的request,待发送response

    int  refcnt;  //引用计数,request 的个数

    };

    作为client保存信息,其中conn保存连接状态,reqs代表已经收到的request,done_reqs代表待发送响应的request.

    10

    enum cpg_event_type {

    CPG_EVENT_CONCHG,

    CPG_EVENT_DELIVER,

    CPG_EVENT_REQUEST,

    };

    struct cpg_event {

    enum cpg_event_type ctype;

    struct list_head cpg_event_list;

    unsigned int skip;

    };

    typedef void (*req_end_t) (struct request *);

    struct  request {

    struct cpg_event cev;

    struct sd_req rq;

    struct sd_rsp rp;

    void *data;

    struct client_info *ci;

    struct list_head  r_siblings;  //client_info->reqs

    struct list_head  r_wlist;    //client_info->done_reqs

    struct list_head  pending_list; //sys->pending_list

    uint64_t local_oid[2];

    struct sheepdog_node_list_entry  entry[SD_MAX_NODES];

    int  nr_nodes;

    int  check_consistency;

    req_end_t  done;

    struct work  work;

    };

    Server端 请求的详细信息

    11

    struct cluster_info {

    cpg_handle_t handle;

    /* set after finishing the JOIN procedure */

    int join_finished;

    uint32_t this_nodeid;

    uint32_t this_pid;

    struct sheepdog_node_list_entry  this_node;

    uint32_t epoch;

    uint32_t status;

    /*

    * we add a node to cpg_node_list in confchg then move it to

    * sd_node_list when the node joins sheepdog.

    */

    struct list_head  cpg_node_list;

    struct list_head  sd_node_list;

    struct list_head  pending_list;   //未收到响应的request

    DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);

    struct list_head  outstanding_req_list;

    struct list_head  req_wait_for_obj_list;

    struct list_head  consistent_obj_list;

    uint32_t  nr_sobjs;  //副本个数

    struct list_head  cpg_event_siblings;

    struct cpg_event  *cur_cevent;

    unsigned long  cpg_event_work_flags;

    int  nr_outstanding_io;

    int  nr_outstanding_reqs;

    uint32_t  recovered_epoch;

    };

    extern struct cluster_info *sys;

    整个集群的信息

    您可能对下面文章也感兴趣:

    • sheepdog源码分析之关键模块介绍(一)
    • sheepdog源码学习笔记一
    • KVM分布式共享存储解决方案-sheepdog
    • 字符串的哈希算法hnv的介绍和实现
    • sheepdog源码学习二之代码目录结构介绍


沪ICP备19023445号-2号
友情链接