1. 程式人生 > >android emulator虛擬裝置分析第二篇之pipe

android emulator虛擬裝置分析第二篇之pipe

一、概述

qemu pipe也是一個虛擬裝置,是一個通用的虛擬裝置,用於提供guest os和emulator通訊的功能,類似於一個抽象的通訊層,這樣就不用寫很多虛擬裝置了。

之前在guest os中有個qemud程序,也是幹這個事的,使用虛擬裝置ttyS1提供guest os和emulator通訊的功能,速度比較慢,已被pipe所替代。

看本篇之前,必須看完第一篇;看完本篇,然後看第三篇,這兩個是結合在一起的,都看完後建議回顧一下本篇。

基於通用的資料通訊pipe,emulator提供了四種服務

Available services:
-------------------

  tcp:<port>

     Open a TCP socket to a given localhost port. This provides a very fast
     pass-through that doesn't depend on the very slow internal emulator
     NAT router. Note that you can only use the file descriptor with read()
     and write() though, send() and recv() will return an ENOTSOCK error,
     as well as any socket ioctl().

     For security reasons, it is not possible to connect to non-localhost
     ports.

  unix:<path>

     Open a Unix-domain socket on the host.

  opengles

     Connects to the OpenGL ES emulation process. For now, the implementation
     is equivalent to tcp:22468, but this may change in the future.

  qemud

     Connects to the QEMUD service inside the emulator. This replaces the
     connection that was performed through /dev/ttyS1 in older Android platform
     releases. See $QEMU/docs/ANDROID-QEMUD.TXT for details.
其中qemud又提供了一些子服務比如
"gsm" service
"gps" service
"hw-control" / "control" service
"sensors" service
"boot-properties" service

如何使用qemu_pipe去通訊,將在第三篇中以qemud service中的"boot-properties" service為例去介紹,本篇僅關心虛擬裝置以及驅動

二、驅動

先看文件

XIV. QEMU Pipe device:
======================

Relevant files:
  $QEMU/hw/android/goldfish/pipe.c
  $KERNEL/drivers/misc/qemupipe/qemu_pipe.c

Device properties:
  Name: qemu_pipe
  Id: -1
  IrqCount: 1
  I/O Registers:
    0x00  COMMAND          W: Write to perform command (see below).
    0x04  STATUS           R: Read status
    0x08  CHANNEL          RW: Read or set current channel id.
    0x0c  SIZE             RW: Read or set current buffer size.
    0x10  ADDRESS          RW: Read or set current buffer physical address.
    0x14  WAKES            R: Read wake flags.
    0x18  PARAMS_ADDR_LOW  RW: Read/set low bytes of parameters block address.
    0x1c  PARAMS_ADDR_HIGH RW: Read/set high bytes of parameters block address.
    0x20  ACCESS_PARAMS    W: Perform access with parameter block.

This is a special device that is totally specific to QEMU, but allows guest
processes to communicate directly with the emulator with extremely high
performance. This is achieved by avoiding any in-kernel memory copies, relying
on the fact that QEMU can access guest memory at runtime (under proper
conditions controlled by the kernel).

Please refer to $QEMU/docs/ANDROID-QEMU-PIPE.TXT for full details on the
device's operations.

1、COMMAND包括CMD_OPEN,CMD_CLOSE,CMD_POLL,CMD_WRITE_BUFFER,CMD_WAKE_ON_WRITE(可寫時喚醒),CMD_READ_BUFFER,CMD_WAKE_ON_READ(可讀時喚醒)
2、CHANNEL,每次開啟/dev/qemu_pipe,都將新建一個struct qemu_pipe* pipe,相當於在/dev/qemu_pipe上面新開了一個通道,通道號CHANNEL=(unsigned long)pipe
3、WAKES,是否應該將讀等待/寫等待的執行緒喚醒
4、PARAMS_ADDR_LOW,PARAMS_ADDR_HIGH,ACCESS_PARAMS用於快速讀寫訪問,這個看不懂的話不影響理解qemu_pipe,可以跳過。
struct access_params{
    uint32_t channel;
    uint32_t size;
    uint32_t address;
    uint32_t cmd;
    uint32_t result;
    /* reserved for future extension */
    uint32_t flags;
};
kernel程式碼中qemu_pipe_dev在probe時,會申請一個access_params結構體,並將它在guest os的核心實體地址寫入PARAMS_ADDR_LOW和PARAMS_ADDR_HIGH。
kernel程式碼在需要進行快速讀寫訪問時,設定access_params結構體的內容,然後使用ACCESS_PARAMS啟動快速讀寫。

emulator程式碼中虛擬裝置將PARAMS_ADDR_LOW和PARAMS_ADDR_HIGH所表示的地址對映到emulator虛擬空間地址中,然後去獲取channel, size, address, cmd等資料然後去操作,相同於一次IO訪問,得到多個IO資料,所以叫做batch,快速訪問。
注意PARAMS_ADDR_LOW和PARAMS_ADDR_HIGH寫的是guest os的核心實體地址,access_params結構體裡面的buffer還是guest os核心虛擬地址。


驅動程式為goldfish程式碼中的drivers/misc/qemupipe/qemu_pipe.c

初始化程式碼為:

static struct platform_driver qemu_pipe = {
    .probe = qemu_pipe_probe,
    .remove = qemu_pipe_remove,
    .driver = {
        .name = "qemu_pipe"
    }
};

static int __init qemu_pipe_dev_init(void)
{
    return platform_driver_register(&qemu_pipe);
}

static void qemu_pipe_dev_exit(void)
{
    platform_driver_unregister(&qemu_pipe);
}

qemu_pipe_probe乾的還是那些事,得到IO記憶體資源,進行ioremap,得到中斷號,設定中斷函式。最後使用misc_register註冊了一個雜項字元裝置,裝置檔案為/dev/qemu_pipe:

static const struct file_operations qemu_pipe_fops = {
    .owner = THIS_MODULE,
    .read = qemu_pipe_read,
    .write = qemu_pipe_write,
    .poll = qemu_pipe_poll,
    .open = qemu_pipe_open,
    .release = qemu_pipe_release,
};

static struct miscdevice qemu_pipe_device = {
    .minor = MISC_DYNAMIC_MINOR,
    .name = "qemu_pipe",
    .fops = &qemu_pipe_fops,
};

static int qemu_pipe_probe(struct platform_device *pdev)
{
    int err;
    struct resource *r;
    struct qemu_pipe_dev *dev = pipe_dev;

    PIPE_D("Creating device\n");

    INIT_RADIX_TREE(&dev->pipes, GFP_ATOMIC);
    /* not thread safe, but this should not happen */
    if (dev->base != NULL) {
        printk(KERN_ERR "QEMU PIPE Device: already mapped at %p\n",
            dev->base);
        return -ENODEV;
    }
    r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    if (r == NULL || r->end - r->start < PAGE_SIZE - 1) {
        printk(KERN_ERR "QEMU PIPE Device: can't allocate i/o page\n");
        return -EINVAL;
    }
    dev->base = ioremap(r->start, PAGE_SIZE);
    PIPE_D("The mapped IO base is %p\n", dev->base);

    r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
    if (r == NULL) {
        printk(KERN_ERR "QEMU PIPE Device: failure to allocate IRQ\n");
        err = -EINVAL;
        goto err_alloc_irq;
    }
    dev->irq = r->start;
    PIPE_D("The IRQ is %d\n", dev->irq);
    err = request_irq(dev->irq, qemu_pipe_interrupt, IRQF_SHARED,
                "goldfish_pipe", dev);
    if (err)
        goto err_alloc_irq;

    spin_lock_init(&dev->lock);

    err = misc_register(&qemu_pipe_device);
    if (err)
        goto err_misc_register;

    setup_access_params_addr(dev);
    return 0;

err_misc_register:
    free_irq(dev->irq, pdev);
err_alloc_irq:
    iounmap(dev->base);
    dev->base = NULL;
    return err;
}


qemu_pipe_open,每次開啟/dev/qemu_pipe都會alloc一個新的qemu_pipe結構體,每個qemu_pipe結構體對應一個CHANNEL,qemu_pipe結構體將被新增到一個radix_tree中。將qemu_pipe的地址作為CHANNEL(不可能重複的)寫入PIPE_REG_CHANNEL暫存器,然後寫CMD_OPEN到PIPE_REG_COMMAND中,去開啟新的CHANNEL。最後設定了filp的私有變數為qemu_pipe結構體。

static int qemu_pipe_open(struct inode *inode, struct file *file)
{
    unsigned long irq_flags;
    struct qemu_pipe *pipe;
    struct qemu_pipe_dev *dev = pipe_dev;
    int32_t status;
    int ret;

    /* Allocate new pipe kernel object */
    pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
    if (pipe == NULL) {
        PIPE_E("Not enough kernel memory to allocate new pipe\n");
        return -ENOMEM;
    }

    PIPE_D("Opening pipe %p\n", pipe);

    pipe->dev = dev;
    mutex_init(&pipe->lock);
    init_waitqueue_head(&pipe->wake_queue);

    /* Now, tell the emulator we're opening a new pipe. We use the
    * pipe object's address as the channel identifier for simplicity.
    */
    spin_lock_irqsave(&dev->lock, irq_flags);
    if ((ret = radix_tree_insert(&dev->pipes, (unsigned long)pipe, pipe))) {
        spin_unlock_irqrestore(&dev->lock, irq_flags);
        PIPE_E("opening pipe failed due to radix tree insertion failure\n");
        kfree(pipe);
        return ret;
    }
    writel((unsigned long)pipe, dev->base + PIPE_REG_CHANNEL);
    writel(CMD_OPEN, dev->base + PIPE_REG_COMMAND);
    status = readl(dev->base + PIPE_REG_STATUS);
    spin_unlock_irqrestore(&dev->lock, irq_flags);

    if (status < 0) {
        PIPE_E("Could not open pipe channel, error=%d\n", status);
        kfree(pipe);
        return status;
    }

    /* All is done, save the pipe into the file's private data field */
    file->private_data = pipe;
    return 0;
}


qemu_pipe_read和qemu_pipe_write都是使用qemu_pipe_read_write來實現的,注意access_ok和__get_user/__put_user對於使用者空間指標的檢測。具體的讀寫比較簡單,就是操作IO暫存器而已,需要注意的是,如果是非阻塞方式,需要進行阻塞等待。
具體的方法就是往PIPE_REG_COMMAND裡面寫CMD_WAKE_ON_WRITE或者CMD_WAKE_ON_READ,然後呼叫wait_event_interruptible去等待!test_bit(wakeBit, &pipe->flags)。
當中斷來臨時,會檢查每一個CHANNEL的PIPE_REG_WAKES暫存器,如果可讀 or 可寫 or 已關閉,中斷函式中會清除pipe->flags中的對應的等待標誌位,然後wait_event_interruptible等待結束。如果是qemu_pipe被關閉的情況,wait_event_interruptible等待結束之後,檢查到錯誤狀態並退出。

/* This function is used for both reading from and writing to a given
 * pipe.
 */
static ssize_t qemu_pipe_read_write(struct file *filp, char __user *buffer,
                    size_t bufflen, int is_write)
{
    unsigned long irq_flags;
    struct qemu_pipe *pipe = filp->private_data;
    struct qemu_pipe_dev *dev = pipe->dev;
    const int cmd_offset = is_write ? 0
                    : (CMD_READ_BUFFER - CMD_WRITE_BUFFER);
    unsigned long address, address_end;
    int ret = 0;

    /* If the emulator already closed the pipe, no need to go further */
    if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) {
        PIPE_W("(write=%d) already closed!\n", is_write);
        ret = -EIO;
        goto out;
    }

    /* Null reads or writes succeeds */
    if (unlikely(bufflen) == 0)
        goto out;

    /* Check the buffer range for access */
    if (!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
            buffer, bufflen)) {
        ret = -EFAULT;
        PIPE_W("rw access_ok failed\n");
        goto out;
    }

    /* Serialize access to the pipe */
    if (mutex_lock_interruptible(&pipe->lock)) {
        PIPE_W("(write=%d) interrupted!\n", is_write);
        return -ERESTARTSYS;
    }

    address = (unsigned long)(void *)buffer;
    address_end = address + bufflen;

    while (address < address_end) {
        unsigned long  page_end = (address & PAGE_MASK) + PAGE_SIZE;
        unsigned long  next     = page_end < address_end ? page_end
                                 : address_end;
        unsigned long  avail    = next - address;
        int status, wakeBit;

        /* Ensure that the corresponding page is properly mapped */
        if (is_write) {
            char c;
            /* Ensure that the page is mapped and readable */
            if (__get_user(c, (char __user *)address)) {
                PIPE_E("read fault at address 0x%08x\n",
                    (unsigned int)address);
                if (!ret)
                    ret = -EFAULT;
                break;
            }
        } else {
            /* Ensure that the page is mapped and writable */
            if (__put_user(0, (char __user *)address)) {
                PIPE_E("write fault at address 0x%08x\n",
                    (unsigned int)address);
                if (!ret)
                    ret = -EFAULT;
                break;
            }
        }

        /* Now, try to transfer the bytes in the current page */
        spin_lock_irqsave(&dev->lock, irq_flags);
        if (dev->aps == NULL || access_with_param(
            dev, CMD_WRITE_BUFFER + cmd_offset, address, avail,
            pipe, &status) < 0)
        {
            writel((unsigned long)pipe,
                dev->base + PIPE_REG_CHANNEL);
            writel(avail, dev->base + PIPE_REG_SIZE);
            writel(address, dev->base + PIPE_REG_ADDRESS);
            writel(CMD_WRITE_BUFFER + cmd_offset,
                dev->base + PIPE_REG_COMMAND);
            status = readl(dev->base + PIPE_REG_STATUS);
        }
        spin_unlock_irqrestore(&dev->lock, irq_flags);

        if (status > 0) { /* Correct transfer */
            ret += status;
            address += status;
            continue;
        }

        if (status == 0)  /* EOF */
            break;

        /* An error occured. If we already transfered stuff, just
        * return with its count. We expect the next call to return
        * an error code */
        if (ret > 0)
            break;

        /* If the error is not PIPE_ERROR_AGAIN, or if we are not in
        * non-blocking mode, just return the error code.
        */
        if (status != PIPE_ERROR_AGAIN ||
            (filp->f_flags & O_NONBLOCK) != 0) {
            ret = qemu_pipe_error_convert(status);
            break;
        }

        /* We will have to wait until more data/space is available.
        * First, mark the pipe as waiting for a specific wake signal.
        */
        wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
        set_bit(wakeBit, &pipe->flags);

        /* Tell the emulator we're going to wait for a wake event */
        spin_lock_irqsave(&dev->lock, irq_flags);
        writel((unsigned long)pipe, dev->base + PIPE_REG_CHANNEL);
        writel(CMD_WAKE_ON_WRITE + cmd_offset,
            dev->base + PIPE_REG_COMMAND);
        spin_unlock_irqrestore(&dev->lock, irq_flags);

        /* Unlock the pipe, then wait for the wake signal */
        mutex_unlock(&pipe->lock);

        while (test_bit(wakeBit, &pipe->flags)) {
            if (wait_event_interruptible(
                    pipe->wake_queue,
                    !test_bit(wakeBit, &pipe->flags))) {
                ret = -ERESTARTSYS;
                PIPE_W("rw, wait_event error\n");
                goto out;
            }

            if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) {
                ret = -EIO;
                PIPE_W("rw, pipe already closed\n");
                goto out;
            }
        }

        /* Try to re-acquire the lock */
        if (mutex_lock_interruptible(&pipe->lock)) {
            ret = -ERESTARTSYS;
            goto out;
        }

        /* Try the transfer again */
        continue;
    }
    mutex_unlock(&pipe->lock);
out:
    return ret;
}

static ssize_t qemu_pipe_read(struct file *filp, char __user *buffer,
                  size_t bufflen, loff_t *ppos)
{
    return qemu_pipe_read_write(filp, buffer, bufflen, 0);
}

static ssize_t qemu_pipe_write(struct file *filp,
                const char __user *buffer, size_t bufflen,
                loff_t *ppos)
{
    return qemu_pipe_read_write(filp, (char __user *)buffer, bufflen, 1);
}


qemu_pipe_poll,實現poll,select,epoll介面用的,沒什麼特殊的,標準實現方式

static unsigned int qemu_pipe_poll(struct file *filp, poll_table *wait)
{
    struct qemu_pipe *pipe = filp->private_data;
    struct qemu_pipe_dev *dev = pipe->dev;
    unsigned long irq_flags;
    unsigned int mask = 0;
    int status;

    mutex_lock(&pipe->lock);

    poll_wait(filp, &pipe->wake_queue, wait);

    spin_lock_irqsave(&dev->lock, irq_flags);
    writel((unsigned long)pipe, dev->base + PIPE_REG_CHANNEL);
    writel(CMD_POLL, dev->base + PIPE_REG_COMMAND);
    status = readl(dev->base + PIPE_REG_STATUS);
    spin_unlock_irqrestore(&dev->lock, irq_flags);

    mutex_unlock(&pipe->lock);

    if (status & PIPE_POLL_IN)
        mask |= POLLIN | POLLRDNORM;

    if (status & PIPE_POLL_OUT)
        mask |= POLLOUT | POLLWRNORM;

    if (status & PIPE_POLL_HUP)
        mask |= POLLHUP;

    if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
        mask |= POLLERR;

    return mask;
}


qemu_pipe_interrupt,中斷處理函式,迴圈處理每一個qemu_pipe,看看是否可讀 or 可寫 or 關閉了,然後喚醒對應的執行緒

static irqreturn_t qemu_pipe_interrupt(int irq, void *dev_id)
{
    struct qemu_pipe_dev *dev = dev_id;
    unsigned long irq_flags;
    int count = 0;

    /* We're going to read from the emulator a list of (channel,flags)
    * pairs corresponding to the wake events that occured on each
    * blocked pipe (i.e. channel).
    */
    spin_lock_irqsave(&dev->lock, irq_flags);
    for (;;) {
        /* First read the channel, 0 means the end of the list */
        struct qemu_pipe *pipe;
        unsigned long wakes;
        unsigned long channel = readl(dev->base + PIPE_REG_CHANNEL);

        if (channel == 0)
            break;

        /* Convert channel to struct pipe pointer + read wake flags */
        wakes = readl(dev->base + PIPE_REG_WAKES);
        pipe  = (struct qemu_pipe *)(ptrdiff_t)channel;

        /* check if pipe is still valid */
        if ((pipe = radix_tree_lookup(&dev->pipes,
            (unsigned long)pipe)) == NULL) {
            PIPE_W("interrupt for already closed pipe\n");
            break;
        }
        /* Did the emulator just closed a pipe? */
        if (wakes & PIPE_WAKE_CLOSED) {
            set_bit(BIT_CLOSED_ON_HOST, &pipe->flags);
            wakes |= PIPE_WAKE_READ | PIPE_WAKE_WRITE;
        }
        if (wakes & PIPE_WAKE_READ)
            clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
        if (wakes & PIPE_WAKE_WRITE)
            clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);

        wake_up_interruptible(&pipe->wake_queue);
        count++;
    }
    spin_unlock_irqrestore(&dev->lock, irq_flags);

    return (count == 0) ? IRQ_NONE : IRQ_HANDLED;
}


setup_access_params_addr和access_with_param用於快速讀寫的,看不懂的可以跳過:

/* 0 on success */
static int setup_access_params_addr(struct qemu_pipe_dev *dev)
{
    uint64_t paddr;
    struct access_params *aps;

    aps = kmalloc(sizeof(struct access_params), GFP_KERNEL);
    if (!aps)
        return -1;

    paddr = __pa(aps);
    writel((uint32_t)(paddr >> 32), dev->base + PIPE_REG_PARAMS_ADDR_HIGH);
    writel((uint32_t)paddr, dev->base + PIPE_REG_PARAMS_ADDR_LOW);

    if (!valid_batchbuffer_addr(dev, aps))
        return -1;

    dev->aps = aps;
    return 0;
}

/* A value that will not be set by qemu emulator */
#define IMPOSSIBLE_BATCH_RESULT (0xdeadbeaf)

static int access_with_param(struct qemu_pipe_dev *dev, const int cmd,
                 unsigned long address, unsigned long avail,
                 struct qemu_pipe *pipe, int *status)
{
    struct access_params *aps = dev->aps;

    aps->result = IMPOSSIBLE_BATCH_RESULT;
    aps->channel = (unsigned long)pipe;
    aps->size = avail;
    aps->address = address;
    aps->cmd = cmd;
    writel(cmd, dev->base + PIPE_REG_ACCESS_PARAMS);

    /* If aps->result unchanged, then batch command failed */
    if (aps->result == IMPOSSIBLE_BATCH_RESULT)
        return -1;

    *status = aps->result;
    return 0;
}

另外需要說明的是幾種不同的地址:
1、guest os程序虛擬地址,使用者空間的地址,核心想使用這種地址時,需要呼叫copy_from_user與copy_to_user去驗證是否正確然後才能讀寫
2、guest os核心虛擬地址,3GB~4GB
3、guest os核心實體地址,經典情況下,就是核心虛擬地址減去一個偏移量(3GB),實體記憶體較大時,情況不同。在qemu中通過safe_get_phys_page_debug可以把guest os核心虛擬地址轉為guest os核心實體地址
4、emulator所在虛擬空間地址,我們的host os中的使用者空間地址,qemu可以操作的記憶體地址。guest os核心實體地址通過cpu_physical_memory_map後可以map為qemu所在的虛擬空間的地址,然後qemu可以去使用核心傳遞過來的記憶體。

三、虛擬裝置

初始化程式碼為pipe_dev_init,沒啥好說的,比battery的簡單多了。最後有三個除錯用的東西,可以不看:

/* initialize the trace device */
void pipe_dev_init(bool newDeviceNaming)
{
    PipeDevice *s;

    s = (PipeDevice *) g_malloc0(sizeof(*s));

    s->dev.name = newDeviceNaming ? "goldfish_pipe" : "qemu_pipe";
    s->dev.id = -1;
    s->dev.base = 0;       // will be allocated dynamically
    s->dev.size = 0x2000;
    s->dev.irq = 0;
    s->dev.irq_count = 1;

    goldfish_device_add(&s->dev, pipe_dev_readfn, pipe_dev_writefn, s);

    register_savevm(NULL,
                    "goldfish_pipe",
                    0,
                    GOLDFISH_PIPE_SAVE_VERSION,
                    goldfish_pipe_save,
                    goldfish_pipe_load,
                    s);

#if DEBUG_ZERO_PIPE
    goldfish_pipe_add_type("zero", NULL, &zeroPipe_funcs);
#endif
#if DEBUG_PINGPONG_PIPE
    goldfish_pipe_add_type("pingpong", NULL, &pingPongPipe_funcs);
#endif
#if DEBUG_THROTTLE_PIPE
    goldfish_pipe_add_type("throttle", NULL, &throttlePipe_funcs);
#endif
}


讀函式為pipe_dev_read,需要注意的是PIPE_REG_CHANNEL。

kernel中的中斷處理函式每次讀取PIPE_REG_CHANNEL時,模擬裝置都會將dev->signaled_pipes連結串列上的一個CHANNEL返回,並設定PIPE_REG_WAKES暫存器,告知kernel中pipe的驅動程式可以喚醒哪一個CHANNEL上的讀等待 or 寫等待的執行緒。

dev->signaled_pipes時滿足條件,等待被喚醒的pipe列表,裡面的節點是在goldfish_pipe_wake函式中新增的。

當dev->signaled_pipes為NULL時,通過goldfish_device_set_irq(&dev->dev, 0, 0)清除中斷請求位。

/* I/O read */
static uint32_t pipe_dev_read(void *opaque, hwaddr offset)
{
    PipeDevice *dev = (PipeDevice *)opaque;

    switch (offset) {
    case PIPE_REG_STATUS:
        DR("%s: REG_STATUS status=%d (0x%x)", __FUNCTION__, dev->status, dev->status);
        return dev->status;

    case PIPE_REG_CHANNEL:
        if (dev->signaled_pipes != NULL) {
            Pipe* pipe = dev->signaled_pipes;
            DR("%s: channel=0x%llx wanted=%d", __FUNCTION__,
               (unsigned long long)pipe->channel, pipe->wanted);
            dev->wakes = pipe->wanted;
            pipe->wanted = 0;
            dev->signaled_pipes = pipe->next_waked;
            pipe->next_waked = NULL;
            if (dev->signaled_pipes == NULL) {
                goldfish_device_set_irq(&dev->dev, 0, 0);
                DD("%s: lowering IRQ", __FUNCTION__);
            }
            return (uint32_t)(pipe->channel & 0xFFFFFFFFUL);
        }
        DR("%s: no signaled channels", __FUNCTION__);
        return 0;

    case PIPE_REG_CHANNEL_HIGH:
        if (dev->signaled_pipes != NULL) {
            Pipe* pipe = dev->signaled_pipes;
            DR("%s: channel_high=0x%llx wanted=%d", __FUNCTION__,
               (unsigned long long)pipe->channel, pipe->wanted);
            return (uint32_t)(pipe->channel >> 32);
        }
        DR("%s: no signaled channels", __FUNCTION__);
        return 0;

    case PIPE_REG_WAKES:
        DR("%s: wakes %d", __FUNCTION__, dev->wakes);
        return dev->wakes;

    case PIPE_REG_PARAMS_ADDR_HIGH:
        return (uint32_t)(dev->params_addr >> 32);

    case PIPE_REG_PARAMS_ADDR_LOW:
        return (uint32_t)(dev->params_addr & 0xFFFFFFFFUL);

    default:
        D("%s: offset=%d (0x%x)\n", __FUNCTION__, offset, offset);
    }
    return 0;
}


寫函式為pipe_dev_write,如果是寫PIPE_REG_COMMAND,有專門的子函式pipeDevice_doCommand處理,如果是寫PIPE_REG_ACCESS_PARAMS,相當於batch操作,傳遞了多個暫存器的值,然後去執行讀寫操作。

static void pipe_dev_write(void *opaque, hwaddr offset, uint32_t value)
{
    PipeDevice *s = (PipeDevice *)opaque;

    switch (offset) {
    case PIPE_REG_COMMAND:
        DR("%s: command=%d (0x%x)", __FUNCTION__, value, value);
        pipeDevice_doCommand(s, value);
        break;

    case PIPE_REG_SIZE:
        DR("%s: size=%d (0x%x)", __FUNCTION__, value, value);
        s->size = value;
        break;

    case PIPE_REG_ADDRESS:
        DR("%s: address=%d (0x%x)", __FUNCTION__, value, value);
        uint64_set_low(&s->address, value);
        break;

    case PIPE_REG_ADDRESS_HIGH:
        DR("%s: address_high=%d (0x%x)", __FUNCTION__, value, value);
        uint64_set_high(&s->address, value);
        break;

    case PIPE_REG_CHANNEL:
        DR("%s: channel=%d (0x%x)", __FUNCTION__, value, value);
        uint64_set_low(&s->channel, value);
        break;

    case PIPE_REG_CHANNEL_HIGH:
        DR("%s: channel_high=%d (0x%x)", __FUNCTION__, value, value);
        uint64_set_high(&s->channel, value);
        break;

    case PIPE_REG_PARAMS_ADDR_HIGH:
        s->params_addr = (s->params_addr & ~(0xFFFFFFFFULL << 32) ) |
                          ((uint64_t)value << 32);
        break;

    case PIPE_REG_PARAMS_ADDR_LOW:
        s->params_addr = (s->params_addr & ~(0xFFFFFFFFULL) ) | value;
        break;

    case PIPE_REG_ACCESS_PARAMS:
    {
        struct access_params aps;
        struct access_params_64 aps64;
        uint32_t cmd;

        /* Don't touch aps.result if anything wrong */
        if (s->params_addr == 0)
            break;

        if (goldfish_guest_is_64bit()) {
            cpu_physical_memory_read(s->params_addr, (void*)&aps64,
                                     sizeof(aps64));
        } else {
            cpu_physical_memory_read(s->params_addr, (void*)&aps,
                                     sizeof(aps));
        }
        /* sync pipe device state from batch buffer */
        if (goldfish_guest_is_64bit()) {
            s->channel = aps64.channel;
            s->size = aps64.size;
            s->address = aps64.address;
            cmd = aps64.cmd;
        } else {
            s->channel = aps.channel;
            s->size = aps.size;
            s->address = aps.address;
            cmd = aps.cmd;
        }
        if ((cmd != PIPE_CMD_READ_BUFFER) && (cmd != PIPE_CMD_WRITE_BUFFER))
            break;

        pipeDevice_doCommand(s, cmd);
        if (goldfish_guest_is_64bit()) {
            aps64.result = s->status;
            cpu_physical_memory_write(s->params_addr, (void*)&aps64,
                                      sizeof(aps64));
        } else {
            aps.result = s->status;
            cpu_physical_memory_write(s->params_addr, (void*)&aps,
                                      sizeof(aps));
        }
    }
    break;

    default:
        D("%s: offset=%d (0x%x) value=%d (0x%x)\n", __FUNCTION__, offset,
            offset, value, value);
        break;
    }
}


pipeDevice_doCommand,開啟,關閉,讀,寫,可讀時喚醒,可寫時喚醒。
需要注意的是:

1、在剛開啟CHANNEL時,pipe->funcs函式指標指向pipeConnector_funcs,根據guest os第一次寫入到/dev/qemu_pipe的內容,得到pipe service的名字以及args。

之後,pipe->funcs都將指向對應的pipe service中實現的函式

2、使用safe_get_phys_page_debug將傳遞過來的guest os核心虛擬地址轉為guest os核心實體地址,然後使用qemu_get_ram_ptr轉為emulator程序的虛擬空間地址。

static void
pipeDevice_doCommand( PipeDevice* dev, uint32_t command )
{
    Pipe** lookup = pipe_list_findp_channel(&dev->pipes, dev->channel);
    Pipe*  pipe   = *lookup;
    CPUOldState* env = cpu_single_env;

    /* Check that we're referring a known pipe channel */
    if (command != PIPE_CMD_OPEN && pipe == NULL) {
        dev->status = PIPE_ERROR_INVAL;
        return;
    }

    /* If the pipe is closed by the host, return an error */
    if (pipe != NULL && pipe->closed && command != PIPE_CMD_CLOSE) {
        dev->status = PIPE_ERROR_IO;
        return;
    }

    switch (command) {
    case PIPE_CMD_OPEN:
        DD("%s: CMD_OPEN channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
        if (pipe != NULL) {
            dev->status = PIPE_ERROR_INVAL;
            break;
        }
        pipe = pipe_new(dev->channel, dev);
        pipe->next = dev->pipes;
        dev->pipes = pipe;
        dev->status = 0;
        break;

    case PIPE_CMD_CLOSE:
        DD("%s: CMD_CLOSE channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
        /* Remove from device's lists */
        *lookup = pipe->next;
        pipe->next = NULL;
        pipe_list_remove_waked(&dev->signaled_pipes, pipe);
        pipe_free(pipe);
        break;

    case PIPE_CMD_POLL:
        dev->status = pipe->funcs->poll(pipe->opaque);
        DD("%s: CMD_POLL > status=%d", __FUNCTION__, dev->status);
        break;

    case PIPE_CMD_READ_BUFFER: {
        /* Translate virtual address into physical one, into emulator memory. */
        GoldfishPipeBuffer  buffer;
        target_ulong        address = dev->address;
        target_ulong        page    = address & TARGET_PAGE_MASK;
        hwaddr  phys;
        phys = safe_get_phys_page_debug(ENV_GET_CPU(env), page);
#ifdef TARGET_X86_64
        phys = phys & TARGET_PTE_MASK;
#endif
        buffer.data = qemu_get_ram_ptr(phys) + (address - page);
        buffer.size = dev->size;
        dev->status = pipe->funcs->recvBuffers(pipe->opaque, &buffer, 1);
        DD("%s: CMD_READ_BUFFER channel=0x%llx address=0x%16llx size=%d > status=%d",
           __FUNCTION__, (unsigned long long)dev->channel, (unsigned long long)dev->address,
           dev->size, dev->status);
        break;
    }

    case PIPE_CMD_WRITE_BUFFER: {
        /* Translate virtual address into physical one, into emulator memory. */
        GoldfishPipeBuffer  buffer;
        target_ulong        address = dev->address;
        target_ulong        page    = address & TARGET_PAGE_MASK;
        hwaddr  phys;
        phys = safe_get_phys_page_debug(ENV_GET_CPU(env), page);
#ifdef TARGET_X86_64
        phys = phys & TARGET_PTE_MASK;
#endif
        buffer.data = qemu_get_ram_ptr(phys) + (address - page);
        buffer.size = dev->size;
        dev->status = pipe->funcs->sendBuffers(pipe->opaque, &buffer, 1);
        DD("%s: CMD_WRITE_BUFFER channel=0x%llx address=0x%16llx size=%d > status=%d",
           __FUNCTION__, (unsigned long long)dev->channel, (unsigned long long)dev->address,
           dev->size, dev->status);
        break;
    }

    case PIPE_CMD_WAKE_ON_READ:
        DD("%s: CMD_WAKE_ON_READ channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
        if ((pipe->wanted & PIPE_WAKE_READ) == 0) {
            pipe->wanted |= PIPE_WAKE_READ;
            pipe->funcs->wakeOn(pipe->opaque, pipe->wanted);
        }
        dev->status = 0;
        break;

    case PIPE_CMD_WAKE_ON_WRITE:
        DD("%s: CMD_WAKE_ON_WRITE channel=0x%llx", __FUNCTION__, (unsigned long long)dev->channel);
        if ((pipe->wanted & PIPE_WAKE_WRITE) == 0) {
            pipe->wanted |= PIPE_WAKE_WRITE;
            pipe->funcs->wakeOn(pipe->opaque, pipe->wanted);
        }
        dev->status = 0;
        break;

    default:
        D("%s: command=%d (0x%x)\n", __FUNCTION__, command, command);
    }
}


pipeDevice_doCommand中提到的pipeConnector_funcs函式陣列,只有一個pipeConnector_sendBuffers有效,其他都是空殼

pipeConnector_sendBuffers用於guest os第一次往/dev/qemu_pipe中寫資料,資料內容為pipe:<service name>:<args>,去尋找匹配的pipe service,然後呼叫其初始化函式,得到peer(第三篇中的QemudPipe,也是pipe service funcs中的引數opaque),然後設定pipe->funcs指向pipe service提供的funcs。

static int
pipeConnector_sendBuffers( void* opaque, const GoldfishPipeBuffer* buffers, int numBuffers )
{
    PipeConnector* pcon = opaque;
    const GoldfishPipeBuffer*  buffers_limit = buffers + numBuffers;
    int ret = 0;

    DD("%s: channel=0x%llx numBuffers=%d", __FUNCTION__,
       (unsigned long long)pcon->pipe->channel,
       numBuffers);

    while (buffers < buffers_limit) {
        int  avail;

        DD("%s: buffer data (%3d bytes): '%.*s'", __FUNCTION__,
           buffers[0].size, buffers[0].size, buffers[0].data);

        if (buffers[0].size == 0) {
            buffers++;
            continue;
        }

        avail = sizeof(pcon->buffer) - pcon->buffpos;
        if (avail > buffers[0].size)
            avail = buffers[0].size;

        if (avail > 0) {
            memcpy(pcon->buffer + pcon->buffpos, buffers[0].data, avail);
            pcon->buffpos += avail;
            ret += avail;
        }
        buffers++;
    }

    /* Now check that our buffer contains a zero-terminated string */
    if (memchr(pcon->buffer, '\0', pcon->buffpos) != NULL) {
        /* Acceptable formats for the connection string are:
         *
         *   pipe:<name>
         *   pipe:<name>:<arguments>
         */
        char* pipeName;
        char* pipeArgs;

        D("%s: connector: '%s'", __FUNCTION__, pcon->buffer);

        if (memcmp(pcon->buffer, "pipe:", 5) != 0) {
            /* Nope, we don't handle these for now. */
            D("%s: Unknown pipe connection: '%s'", __FUNCTION__, pcon->buffer);
            return PIPE_ERROR_INVAL;
        }

        pipeName = pcon->buffer + 5;
        pipeArgs = strchr(pipeName, ':');

        if (pipeArgs != NULL) {
            *pipeArgs++ = '\0';
            if (!*pipeArgs)
                pipeArgs = NULL;
        }

        Pipe* pipe = pcon->pipe;
        const PipeService* svc = goldfish_pipe_find_type(pipeName);
        if (svc == NULL) {
            D("%s: Unknown server!", __FUNCTION__);
            return PIPE_ERROR_INVAL;
        }

        void*  peer = svc->funcs.init(pipe, svc->opaque, pipeArgs);
        if (peer == NULL) {
            D("%s: Initialization failed!", __FUNCTION__);
            return PIPE_ERROR_INVAL;
        }

        /* Do the evil switch now */
        pipe->opaque = peer;
        pipe->service = svc;
        pipe->funcs  = &svc->funcs;
        pipe->args   = ASTRDUP(pipeArgs);
        AFREE(pcon);
    }

    return ret;
}


goldfish_pipe_add_type用於註冊pipe service

void
goldfish_pipe_add_type(const char*               pipeName,
                       void*                     pipeOpaque,
                       const GoldfishPipeFuncs*  pipeFuncs )
{
    PipeServices* list = _pipeServices;
    int           count = list->count;

    if (count >= MAX_PIPE_SERVICES) {
        APANIC("Too many goldfish pipe services (%d)", count);
    }

    if (strlen(pipeName) > MAX_PIPE_SERVICE_NAME_SIZE) {
        APANIC("Pipe service name too long: '%s'", pipeName);
    }

    list->services[count].name   = pipeName;
    list->services[count].opaque = pipeOpaque;
    list->services[count].funcs  = pipeFuncs[0];

    list->count++;
}


goldfish_pipe_find_type用於按pipe service name查詢pipe service

static const PipeService*
goldfish_pipe_find_type(const char*  pipeName)
{
    PipeServices* list = _pipeServices;
    int           count = list->count;
    int           nn;

    for (nn = 0; nn < count; nn++) {
        if (!strcmp(list->services[nn].name, pipeName)) {
            return &list->services[nn];
        }
    }
    return NULL;
}


pipe_list_findp_channel,pipe_list_findp_waked,pipe_list_remove_waked是一些連結串列操作

static Pipe**
pipe_list_findp_channel( Pipe** list, uint64_t channel )
{
    Pipe** pnode = list;
    for (;;) {
        Pipe* node = *pnode;
        if (node == NULL || node->channel == channel) {
            break;
        }
        pnode = &node->next;
    }
    return pnode;
}

static Pipe**
pipe_list_findp_waked( Pipe** list, Pipe* pipe )
{
    Pipe** pnode = list;
    for (;;) {
        Pipe* node = *pnode;
        if (node == NULL || node == pipe) {
            break;
        }
        pnode = &node->next_waked;
    }
    return pnode;
}


static void
pipe_list_remove_waked( Pipe** list, Pipe*  pipe )
{
    Pipe** lookup = pipe_list_findp_waked(list, pipe);
    Pipe*  node   = *lookup;

    if (node != NULL) {
        (*lookup) = node->next_waked;
        node->next_waked = NULL;
    }
}


goldfish_pipe_wake主要是在具體的pipe service中使用的,當pipe service可以接收資料或者被寫入資料時,去喚醒等待的執行緒
void
goldfish_pipe_wake( void* hwpipe, unsigned flags )
{
    Pipe*  pipe = hwpipe;
    Pipe** lookup;
    PipeDevice*  dev = pipe->device;

    DD("%s: channel=0x%llx flags=%d", __FUNCTION__, (unsigned long long)pipe->channel, flags);

    /* If not already there, add to the list of signaled pipes */
    lookup = pipe_list_findp_waked(&dev->signaled_pipes, pipe);
    if (!*lookup) {
        pipe->next_waked = dev->signaled_pipes;
        dev->signaled_pipes = pipe;
    }
    pipe->wanted |= (unsigned)flags;

    /* Raise IRQ to indicate there are items on our list ! */
    goldfish_device_set_irq(&dev->dev, 0, 1);
    DD("%s: raising IRQ", __FUNCTION__);
}


goldfish_pipe_close關閉時,需要喚醒等待的執行緒

void
goldfish_pipe_close( void* hwpipe )
{
    Pipe* pipe = hwpipe;

    D("%s: channel=0x%llx (closed=%d)", __FUNCTION__, (unsigned long long)pipe->channel, pipe->closed

    if (!pipe->closed) {
        pipe->closed = 1;
        goldfish_pipe_wake( hwpipe, PIPE_WAKE_CLOSED );
    }
}