IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    KVM源代码分析5:IO虚拟化之PIO

    OenHan发表于 2016-08-21 12:49:49
    love 0

    源代码:git tag, kernel v3.16.37 qemu v2.7 ,上一篇:KVM源代码分析4:内存虚拟化–OenHan

    1. PIO指令介绍

    80386的I/O指令使得处理器可以访问I/O端口,以便从外设输入数据,或者向外设发送数据。这些指令有一个指定I/O空间端口地址的操作数。有两类的I/O指令:

    1、 在寄存器指定的地址传送一个数据(字节、字、双字)。

    2、 传送指定内存中的一串数据(字节串、字串、双字串)。这些被称作为“串 I/O指令”或者说“块I/O指令”。

    有IN/OUT INS/OUTS指令

    2. PIO运行在KVM

    当guest执行PIO指令时,触发vmx_handle_exit,根据EXIT_REASON_IO_INSTRUCTION执行handle_io函数,根据sdm 3 27.5图表:

    Bit Position(s) Contents Contents
    2:0 Size of access:
    0 = 1-byte 1 = 2-byte 3 = 4-byte
    Other values not used
    3 Direction of the attempted access (0 = OUT, 1 = IN)
    4 String instruction (0 = not string; 1 = string)
    5 REP prefixed (0 = not REP; 1 = REP)
    6 Operand encoding (0 = DX, 1 = immediate)
    15:7 Reserved (cleared to 0)
    31:16 Port number (as specified in DX or in an immediate operand)
    63:32 Reserved (cleared to 0). These bits exist only on processors that support Intel 64 architecture.

    在handle_io中,如果

    string = (exit_qualification & 16) != 0;
    
    in = (exit_qualification & 8) != 0;
    
    // string串指令或者IO读的指令进行处理
    
    if (string || in)
    
    return emulate_instruction(vcpu, 0) == EMULATE_DONE;
    
    port = exit_qualification >> 16;
    
    size = (exit_qualification & 7) + 1;
    
    //跳过模拟指令,将非串写指令参数保存
    
    skip_emulated_instruction(vcpu);
    
    return kvm_fast_pio_out(vcpu, size, port);

    先看kvm_fast_pio_out函数

    unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    
    int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
    
       size, port, &val, 1);

    pio指令对象放在eax里面,在emulator_pio_out_emulated中

    memcpy(vcpu->arch.pio_data, val, size * count);    
    
    vcpu->arch.pio.port = port;
    
    vcpu->arch.pio.in = in;
    
    vcpu->arch.pio.count  = count;
    
    vcpu->arch.pio.size = size;
    
    if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
    
    vcpu->arch.pio.count = 0;
    
    return 1;
    
    }
    
    vcpu->run->exit_reason = KVM_EXIT_IO;
    
    vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    vcpu->run->io.size = size;
    
    vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    vcpu->run->io.count = count;
    
    vcpu->run->io.port = port;

    此处就将pio保存到vcpu->arch.pio中了,注意exit_reason赋值为KVM_EXIT_IO

    麻烦点在x86_emulate_instruction函数,具体看

    struct x86_emulate_ctxt {
    
            /*操作对象*/
    
    const struct x86_emulate_ops *ops;
    
    /* Register state before/after emulation. */
    
    unsigned long eflags;
    
    unsigned long eip;  /* eip before instruction emulation */
    
    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
    
    enum x86emul_mode mode;
    
    /* interruptibility state, as a result of execution of STI or MOV SS */
    
    int interruptibility;
    
    int emul_flags;
    
    bool perm_ok; /* do not check permissions if true */
    
    bool ud;/* inject an #UD if host doesn't support insn */
    
    bool have_exception;
    
    struct x86_exception exception;
    
    /*
    
    * decode cache
    
    */
    
    /* current opcode length in bytes */
    
    u8 opcode_len;
    
    u8 b;
    
    u8 intercept;
    
    u8 op_bytes;
    
    u8 ad_bytes;
    
    struct operand src;
    
    struct operand src2;
    
    struct operand dst;
    
    int (*execute)(struct x86_emulate_ctxt *ctxt);
    
    int (*check_perm)(struct x86_emulate_ctxt *ctxt);
    
    /*
    
    * The following six fields are cleared together,
    
    * the rest are initialized unconditionally in x86_decode_insn
    
    * or elsewhere
    
    */
    
    bool rip_relative;
    
    u8 rex_prefix;
    
    u8 lock_prefix;
    
    u8 rep_prefix;
    
    /* bitmaps of registers in _regs[] that can be read */
    
    u32 regs_valid;
    
    /* bitmaps of registers in _regs[] that have been written */
    
    u32 regs_dirty;
    
    /* modrm */
    
    u8 modrm;
    
    u8 modrm_mod;
    
    u8 modrm_reg;
    
    u8 modrm_rm;
    
    u8 modrm_seg;
    
    u8 seg_override;
    
    u64 d;
    
    unsigned long _eip;
    
    struct operand memop;
    
    /* Fields above regs are cleared together. */
    
    unsigned long _regs[NR_VCPU_REGS];
    
    struct operand *memopp;
    
    struct fetch_cache fetch;
    
    struct read_cache io_read;
    
    struct read_cache mem_read;
    
    };
    
    //init_emulate_ctxt负责初始化这个结构体,
    
    ctxt->fetch.ptr = ctxt->fetch.data;
    ctxt->fetch.end = ctxt->fetch.data + insn_len;
    if (insn_len > 0)
             /*所有指令内容存放在data中*/
    memcpy(ctxt->fetch.data, insn, insn_len);
    else {
            /*如果没有指定指令的内容,就从当前eip读取一个指令*/
    rc = __do_insn_fetch_bytes(ctxt, 1);
    if (rc != X86EMUL_CONTINUE)
    return rc;
    }

    都是将指令读取出来放到ctxt->fetch.data中而已,没有执行。

    看__do_insn_fetch_bytes函数,

    int cur_size = ctxt->fetch.end - ctxt->fetch.data;
    
    /*下面的ea事实上应该说是指令decode时真正的eip,因为指令还没有被执行,eip没有
    
     * 更新,所以每次decode计算eip都要加上已经decode的代码长度,就是cur_size */
    
    struct segmented_address addr = { .seg = VCPU_SREG_CS,
      .ea = ctxt->eip + cur_size };
    
    /*__linearize就是获取线性地址*/
     la = seg_base(ctxt, addr.seg) + addr.ea;

    后面的即是一些处理,如不满足权限的就要模拟gp错误,emulate_gp。

    然后是:

    rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, size, &ctxt->exception);

    即kvm_fetch_guest_virt

    先通过vcpu->arch.walk_mmu->gva_to_gpa获取gpa的值,然后用kvm_vcpu_read_guest_page获取gpa对应的内存值,这两个函数不再展开。

    最终效果就是将linear对应的mem中大小为size的内容写入到ctxt->fetch.end指针对应的缓存中,因为ctxt->fetch.end = ctxt->fetch.data + insn_len而insn_len为0,则写入的是ctxt->fetch.data,在x86_decode_insn中,__do_insn_fetch_bytes就是从eip中copy了一个指令的内容到ctxt->fetch.data。x86_emulate_insn后面是指令解释的跳过。

    x86_emulate_instruction后面一段代码:

    /*exception在decode中产生*/
    if (ctxt->have_exception) {
    r = EMULATE_DONE;
    if (inject_emulated_exception(vcpu))
    return r;
    /*decode会处理,pio的执行次数,针对串指令*/
    } else if (vcpu->arch.pio.count) {
    /*写入io的不特殊处理,只需完成写入即可*/
    if (!vcpu->arch.pio.in) {
    /* FIXME: return into emulator if single-stepping.  */
    vcpu->arch.pio.count = 0;
    } else {
    writeback = false;
    /*读取io的则需要回头处理读的值的流程*/
    vcpu->arch.complete_userspace_io = complete_emulated_pio;
    }
    r = EMULATE_USER_EXIT;
    } else if (vcpu->mmio_needed) {
    /*同上*/
    if (!vcpu->mmio_is_write)
    writeback = false;
    r= EMULATE_USER_EXIT;
    vcpu->arch.complete_userspace_io = complete_emulated_mmio;
    } else if (r == EMULATION_RESTART)
    goto restart;
    else
    r = EMULATE_DONE;

    handle_io 返回return emulate_instruction(vcpu, 0) == EMULATE_DONE,即是kvm_x86_ops->handle_exit(vcpu)的返回值,返回值不大于0,则从vcpu_run循环中跳出到qemu mode。

    if (kvm_vcpu_running(vcpu)) {
    r = vcpu_enter_guest(vcpu);
    } else {
    r = vcpu_block(kvm, vcpu);
    }
    if (r <= 0)
    break;

    3. PIO运行在QEMU

    在qemu kvm_cpu_exec函数中:

    switch (run->exit_reason) {
    /*KVM_EXIT_IO是在emulator_pio_in_out标记的*/
            case KVM_EXIT_IO:
                DPRINTF("handle_io\n");
                /* Called outside BQL */
                kvm_handle_io(run->io.port, attrs,
                              (uint8_t *)run + run->io.data_offset,
                             run->io.direction,
                              run->io.size,
                              run->io.count);
                ret = 0;
                break;
            case KVM_EXIT_MMIO:
                DPRINTF("handle_mmio\n");
                /* Called outside BQL */
                address_space_rw(&address_space_memory,
                                 run->mmio.phys_addr, attrs,
                                 run->mmio.data,
                                 run->mmio.len,
                                 run->mmio.is_write);
                ret = 0;
                break;
    
    static void kvm_handle_io(uint16_t port, MemTxAttrs attrs, void *data, int direction,
    
                              int size, uint32_t count)
    {
        int i;
        uint8_t *ptr = data;
        for (i = 0; i < count; i++) {
            /*从qemu内存中读写数据,本质就成了mmio操作 */
            address_space_rw(&address_space_io, port, attrs, ptr, size, direction == KVM_EXIT_IO_OUT);
            ptr += size;
        }
    }

    如果写IO此处就算完成了,如果读取IO,此时读取完成,还需要后面处理。

    处理在kvm_arch_vcpu_ioctl_run中的vcpu_run之前,

    if (unlikely(vcpu->arch.complete_userspace_io)) {
    int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
    vcpu->arch.complete_userspace_io = NULL;
    r = cui(vcpu);
    if (r <= 0)
    goto out;
    } else
    WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);

    本质执行了complete_userspace_io函数,即是complete_emulated_pio。

    就是r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE),再次模拟IO指令,此时将数据读取到模拟的IO端口上,模拟完成即可。

    —结束—

     

    KVM源代码分析5:IO虚拟化之PIO–OenHan

    http://www.oenhan.com/kvm-src-5-io-pio



沪ICP备19023445号-2号
友情链接