IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    libnl从内核获取taskstats信息

    一根稻草发表于 2015-09-28 00:00:00
    love 0

    Taskstats is a netlink-based interface for sending per-task and per-process statistics from the kernel to userspace. --https://goo.gl/aTdgpp

    Taskstats接口提供两种方式

    • 获取指定进程的统计数据。(用户态程序提供pid号)
    • 当进程终止时,获取其统计数据。(用户态程序提供一个cpumask)

    详细的接口说明本文不再赘述,请参考kernel doc。本文目的在于通过Taskstats接口学习libnl编程方法。

    基于libnl的用户态程序

    • 创建一个nl_sock

        struct nl_sock *sock = nl_socket_alloc()
      

      其中并没有调用socket系统调用,主要是分配一个nl_sock结构体,填充一些字段,如nl_family = AF_NETLINK

    • 创建socket并绑定

        genl_connect(sock)
      

      该函数才调用了socket(), bind()

        sk->s_fd = socket(AF_NETLINK, SOCK_RAW | flags, protocol)
      

      从socket类型可以看出这是无连接的,内核怎么知道哪个进程需要监听taskstats信息呢?
      发包告诉内核

    • 构造nl_msg

        struct nl_msg
        {
                int                     nm_protocol;
                int                     nm_flags;
                struct sockaddr_nl      nm_src;
                struct sockaddr_nl      nm_dst;
                struct ucred            nm_creds;
                struct nlmsghdr *       nm_nlh;
                size_t                  nm_size;
                int                     nm_refcnt;
        };
        struct nlmsghdr {
            __u32           nlmsg_len;      /* Length of message including header */
            __u16           nlmsg_type;     /* Message content */
            __u16           nlmsg_flags;    /* Additional flags */
            __u32           nlmsg_seq;      /* Sequence number */
            __u32           nlmsg_pid;      /* Sending process port ID */
        };
      
        struct nlattr {
                __u16           nla_len;
                __u16           nla_type;
        };
      

      nlmsg_alloc分配nl_msg内存和及default_msg_size内存 default_msg_size = getpagesize(); 4KB

    在nlmsg_alloc中为nl_msg->nm_nlh分配了4KB大小 的内存,nlmsghdr在4KB头部,nlmsghdr, nlattr, data的关系可以参考下图

          <------- NLMSG_ALIGN(hlen) ------> <---- NLMSG_ALIGN(len) --->
         +----------------------------+- - -+- - - - - - - - - - -+- - -+
         |           Header           | Pad |       Payload       | Pad |
         |      struct nlmsghdr       |     |                     |     |
         +----------------------------+- - -+- - - - - - - - - - -+- - -+
    
          <-------- GENL_HDRLEN -------> <--- hdrlen -->
                                         <------- genlmsg_len(ghdr) ------>
         +------------------------+- - -+---------------+- - -+------------+
         | Generic Netlink Header | Pad | Family Header | Pad | Attributes |
         |    struct genlmsghdr   |     |               |     |            |
         +------------------------+- - -+---------------+- - -+------------+
         genlmsg_data(ghdr)--------------^                     ^
         genlmsg_attrdata(ghdr, hdrlen)-------------------------
    
    

    link

    • 发送nl_msg

        nl_send_auto_complete(sock, msg);
      

      实际上是调用的nl_sock的callback函数 cb_send_ow,这一步就让内核知道你这个进程想要获取进程数据,下面就等着内核给你数据就OK了。

    • 注册nl_msg处理函数

        nl_socket_modify_cb(sock, NL_CB_MSG_IN, NL_CB_CUSTOM, msg_recv_cb, NULL);
      

      用pcap抓过包的同学都知道pcap_loop注册数据包处理函数然后进入循环抓包分析过程。

    • 接收nl_msg

        nl_recvmsgs_default(sock);
      

      该函数只是接收一个包,对于持续监听终止进程的统计数据,要自己写个死循环。

    下面是完整的代码

    /*
     gcc read_taskstats.c `pkg-config --cflags --libs libnl-3.0 libnl-genl-3.0` -o read_taskstats
    */
    #include <stdlib.h>
    #include <linux/taskstats.h>
    #include <netlink/netlink.h>
    #include <netlink/genl/genl.h>
    #include <netlink/genl/ctrl.h>
    
    void usage(int argc, char *argv[]);
    int msg_recv_cb(struct nl_msg *, void *);
    
    int main(int argc, char *argv[])
    {
        struct nl_sock *sock;
        struct nl_msg *msg;
        int family;
        int pid = -1;
        char *cpumask;
    
        if (argc > 2 && strcmp(argv[1], "--pid") == 0) {
            pid = atoi(argv[2]);
        } else if (argc > 2 && strcmp(argv[1], "--cpumask") == 0) {
            cpumask = argv[2];
        } else {
            usage(argc, argv);
            exit(1);
        }
    
        sock = nl_socket_alloc();
    
        // Connect to generic netlink socket on kernel side
        genl_connect(sock);
    
        // get the id for the TASKSTATS generic family
        family = genl_ctrl_resolve(sock, "TASKSTATS");
    
        // register for task exit events
        msg = nlmsg_alloc();
    
        genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, family, 0, NLM_F_REQUEST,
                TASKSTATS_CMD_GET, TASKSTATS_VERSION);
        if (pid > 0) {
            nla_put_u32(msg, TASKSTATS_CMD_ATTR_PID, pid);
        } else {
            nla_put_string(msg, TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
                       cpumask);
        }
        nl_send_auto_complete(sock, msg);
        nlmsg_free(msg);
    
        // specify a callback for inbound messages
        nl_socket_modify_cb(sock, NL_CB_MSG_IN, NL_CB_CUSTOM, msg_recv_cb,
                    NULL);
        if (pid > 0) {
            nl_recvmsgs_default(sock);
        } else {
            while (1)
                nl_recvmsgs_default(sock);
        }
        return 0;
    }
    
    void usage(int argc, char *argv[])
    {
        printf("USAGE: %s option\nOptions:\n"
               "\t--pid pid : get statistics during a task's lifetime.\n"
               "\t--cpumask mask : obtain statistics for tasks which are exiting. \n"
               "\t\tThe cpumask is specified as an ascii string of comma-separated \n"
               "\t\tcpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8\n"
               "\t\tthe cpumask would be '1-3,5,7-8'.\n", argv[0]);
    }
    
    #define printllu(str, value)    printf("%s: %llu\n", str, value)
    
    int msg_recv_cb(struct nl_msg *nlmsg, void *arg)
    {
        struct nlmsghdr *nlhdr;
        struct nlattr *nlattrs[TASKSTATS_TYPE_MAX + 1];
        struct nlattr *nlattr;
        struct taskstats *stats;
        int rem;
    
        nlhdr = nlmsg_hdr(nlmsg);
    
        // validate message and parse attributes
        genlmsg_parse(nlhdr, 0, nlattrs, TASKSTATS_TYPE_MAX, 0);
    
        if (nlattr = nlattrs[TASKSTATS_TYPE_AGGR_PID]) {
            stats = nla_data(nla_next(nla_data(nlattr), &rem));
    
            printf("---\n");
            printf("pid: %u\n", stats->ac_pid);
            printf("command: %s\n", stats->ac_comm);
            printf("status: %u\n", stats->ac_exitcode);
            printf("time:\n");
            printf("  start: %u\n", stats->ac_btime);
    
            printllu("  elapsed", stats->ac_etime);
            printllu("  user", stats->ac_utime);
            printllu("  system", stats->ac_stime);
            printf("memory:\n");
            printf("  bytetime:\n");
            printllu("    rss", stats->coremem);
            printllu("    vsz", stats->virtmem);
            printf("  peak:\n");
            printllu("    rss", stats->hiwater_rss);
            printllu("    vsz", stats->hiwater_vm);
            printf("io:\n");
            printf("  bytes:\n");
            printllu("    read", stats->read_char);
            printllu("    write", stats->write_char);
            printf("  syscalls:\n");
            printllu("    read", stats->read_syscalls);
            printllu("    write", stats->write_syscalls);
        }
        return 0;
    }
    

    Tip 1.

    发现一个带有函数调用图的源码阅读网站http://sourcecodebrowser.com,体验一下,还是cscope+ctags方便。



沪ICP备19023445号-2号
友情链接