1. 程式人生 > >CPU親和性的使用與機制--含Xen中VCPU和PCPU的繫結方法

CPU親和性的使用與機制--含Xen中VCPU和PCPU的繫結方法

CPU的親和性, 就是程序要在指定的 CPU 上儘量長時間地執行而不被遷移到其他處理器,親和性是從affinity翻譯過來的,應該有點不準確,給人的感覺是親和性就是有傾向的意思,而實際上是倒向的意思,稱為CPU關聯性更好,程式設計師的土話就是繫結CPU,綁核。

在多核執行的機器上,每個CPU本身自己會有快取,快取著程序使用的資訊,而程序可能會被OS排程到其他CPU上,如此,CPU cache命中率就低了,當繫結CPU後,程式就會一直在指定的cpu跑,不會由作業系統排程到其他CPU上,效能有一定的提高。

另外一種使用綁核考慮就是將重要的業務程序隔離開,對於部分實時程序排程優先順序高,可以將其繫結到一個指定核上,既可以保證實時程序的排程,也可以避免其他CPU上程序被該實時程序干擾。

1.CPU親和性在使用者態的使用

linux的CPU親和性在使用者態表現為一個cpu_set_t掩碼的形式,使用者可以呼叫兩個函式設定和獲取掩碼:

#define _GNU_SOURCE /* See feature_test_macros(7) */
 #include
int sched_setaffinity(pid_t pid, size_t cpusetsize,
 cpu_set_t *mask);
int sched_getaffinity(pid_t pid, size_t cpusetsize,
 cpu_set_t *mask);
 #include
int sched_setaffinity
(pid_t pid, size_t cpusetsize, cpu_set_t *mask); int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask);

sched_setaffinity是設定指定pid親和性掩碼的,mask是傳入的引數;sched_getaffinity則是獲取指定pid親和性掩碼的,mask是獲取的引數。

cpusetsize可以通過sizeof cpu_set_t算出來。

cpu_set_t 是一個掩碼陣列,一共有1024位,每一位都可以對應一個cpu核心,以下巨集,都是對這個掩碼進行操作的。如果需要,一個程序是可以繫結多個cpu的。

void CPU_ZERO(cpu_set_t *set);
void CPU_SET(int cpu, cpu_set_t *set);
void CPU_CLR(int cpu, cpu_set_t *set); CPU_ZERO(cpu_set_t *set);
void CPU_SET(int cpu, cpu_set_t *set);
void CPU_CLR(int cpu, cpu_set_t *set);

而mask的表現是如此的:如果是0X23,轉換成二進位制則為00100011,則表明程序繫結在0核、1核和5核上。

綁核需要注意是,子程序會繼承父程序的綁核關係。

程式碼例項:

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#define gettid() syscall(__NR_gettid)
void *test_thread(void *arg)
{
 cpu_set_t mask;
 int loop = 0;
 int cpu_num = 0;
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 pthread_detach(pthread_self());
 CPU_ZERO(&mask);
 CPU_SET(1, &mask);
 if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("set affinity failedn");
 }
 while(1)
 {
 CPU_ZERO(&mask);
 if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("get failedn");
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask))
 {
 printf("test thread %lu run on processor %dn",
 gettid(), loop);
 }
 }
 sleep(1);
 }
}
void *child_thread(void *arg)
{
 cpu_set_t mask;
 int loop = 0;
 int cpu_num = 0;
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 pthread_detach(pthread_self());
 while(1)
 {
 CPU_ZERO(&mask);
 if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("get failedn");
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask))
 {
 printf("child thread %lu run on processor %dn",
 gettid(), loop);
 }
 }
 sleep(1);
 }
}
int main(int argc, char *argv[])
{
 int cpu_num = 0;
 pthread_t thread;
 int cpuid = 0;
 int ret = 0;
 int loop = 0;
 cpu_set_t mask_set;
 cpu_set_t mask_get;
 if(argc != 2)
 {
 printf("usage:cpu numn");
 return -1;
 }
 cpuid = atoi(argv[1]);
 /* 獲取系統CPU的個數 */
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 printf("system has %i processor.n", cpu_num);
 /* 初始化mask_set */
 CPU_ZERO(&mask_set);
 CPU_SET(cpuid, &mask_set);
 if(sched_setaffinity(0, sizeof(mask_set), &mask_set) == -1)
 {
 printf("Warning:set cpu %d affinity failedn", cpuid);
 }
 ret = pthread_create(&thread, NULL, child_thread, NULL);
 if(ret)
 {
 printf("Error:pthread_create failedn");
 return -1;
 }
 ret = pthread_create(&thread, NULL, test_thread, NULL);
 if(ret)
 {
 printf("Error:pthread_create failedn");
 return -1;
 }
 while(1)
 {
 CPU_ZERO(&mask_get);
 if(sched_getaffinity(0, sizeof(mask_get), &mask_get) == -1)
 {
 printf("Warning:get cpu %d affinity failedn", cpuid);
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask_get))
 {
 printf("this processor %lu is running on processor:
 %dn", gettid(), loop);
 }
 }
 sleep(1);
 }
 return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#define gettid() syscall(__NR_gettid)
void *test_thread(void *arg)
{
 cpu_set_t mask;
 int loop = 0;
 int cpu_num = 0;
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 pthread_detach(pthread_self());
 CPU_ZERO(&mask);
 CPU_SET(1, &mask);
 if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("set affinity failedn");
 }
 while(1)
 {
 CPU_ZERO(&mask);
 if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("get failedn");
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask))
 {
 printf("test thread %lu run on processor %dn",
 gettid(), loop);
 }
 }
 sleep(1);
 }
}
void *child_thread(void *arg)
{
 cpu_set_t mask;
 int loop = 0;
 int cpu_num = 0;
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 pthread_detach(pthread_self());
 while(1)
 {
 CPU_ZERO(&mask);
 if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
 {
 printf("get failedn");
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask))
 {
 printf("child thread %lu run on processor %dn",
 gettid(), loop);
 }
 }
 sleep(1);
 }
}
int main(int argc, char *argv[])
{
 int cpu_num = 0;
 pthread_t thread;
 int cpuid = 0;
 int ret = 0;
 int loop = 0;
 cpu_set_t mask_set;
 cpu_set_t mask_get;
 if(argc != 2)
 {
 printf("usage:cpu numn");
 return -1;
 }
 cpuid = atoi(argv[1]);
 /* 獲取系統CPU的個數 */
 cpu_num = sysconf(_SC_NPROCESSORS_CONF);
 printf("system has %i processor.n", cpu_num);
 /* 初始化mask_set */
 CPU_ZERO(&mask_set);
 CPU_SET(cpuid, &mask_set);
 if(sched_setaffinity(0, sizeof(mask_set), &mask_set) == -1)
 {
 printf("Warning:set cpu %d affinity failedn", cpuid);
 }
 ret = pthread_create(&thread, NULL, child_thread, NULL);
 if(ret)
 {
 printf("Error:pthread_create failedn");
 return -1;
 }
 ret = pthread_create(&thread, NULL, test_thread, NULL);
 if(ret)
 {
 printf("Error:pthread_create failedn");
 return -1;
 }
 while(1)
 {
 CPU_ZERO(&mask_get);
 if(sched_getaffinity(0, sizeof(mask_get), &mask_get) == -1)
 {
 printf("Warning:get cpu %d affinity failedn", cpuid);
 }
 for(loop = 0; loop < cpu_num; loop++)
 {
 if(CPU_ISSET(loop, &mask_get))
 {
 printf("this processor %lu is running on processor:
 %dn", gettid(), loop);
 }
 }
 sleep(1);
 }
 return 0;
}

執行之後根據列印和/proc stat的內容可以判斷,status有

Cpus_allowed: 08
Cpus_allowed_list: 3

可以更清楚的看到程序綁核狀態

但是如果程序已經在執行過程中,使用者不能直接改動程式碼,就用taskset工具更改CPU親和性關係。

taskset [options] -p [mask] pid

其中mask前面已說了,參看man手冊更詳細一點。

二、CPU親和性在核心態機制

在核心程序結構體task_struct裡面有一個引數,即為

cpumask_t cpus_allowed;

用來記住CPU的綁核關係。

核心尤其是排程的時候,可以保證讓task不會被排程到其他CPU上

static inline
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 /*
 * In order not to call set_task_cpu() on a blocking task we need
 * to rely on ttwu() to place the task on a valid ->cpus_allowed
 * cpu.
 *
 * Since this is common to all placement strategies, this lives here.
 *
 * [ this allows ->select_task() to simply return task_cpu(p) and
 * not worry about this generic constraint ]
 */
 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
 !cpu_online(cpu)))
 cpu = select_fallback_rq(task_cpu(p), p);
 return cpu;
}int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 /*
 * In order not to call set_task_cpu() on a blocking task we need
 * to rely on ttwu() to place the task on a valid ->cpus_allowed
 * cpu.
 *
 * Since this is common to all placement strategies, this lives here.
 *
 * [ this allows ->select_task() to simply return task_cpu(p) and
 * not worry about this generic constraint ]
 */
 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
 !cpu_online(cpu)))
 cpu = select_fallback_rq(task_cpu(p), p);
 return cpu;
}

程序在選擇CPU佇列的時候,只選擇被允許的CPU佇列,使用cpumask_test_cpu進行測試。

使用xm vcpu-pin命令可以固定vcpu到物理cpu
 xm vcpu-pin domname vcpu cpu
 vcpu--虛擬cpu的號碼(號碼可以用cat /proc/cpuinfo命令得到,一般從0~N-1[其中N為CPU核數])
 cpu--主機中cpu的號碼

程式碼研究分析:
/xen-4.0.0/tools/python/xen/xm/main.py
96    SUBCOMMAND_HELP = {
97      # common commands
------------------------------------------
160     'vcpu-pin'    : ('<Domain> <VCPU|all> <CPUs|all>',
161        'Set which CPUs a VCPU can use.'),
------------------------------------------
261     }

-----------------------------------------
376    domain_commands = [
-----------------------------------------
408        "vcpu-pin",
-----------------------------------------
1455    def xm_vcpu_pin(args):
    arg_check(args, "vcpu-pin", 3)

    def cpu_make_map(cpulist):
        cpus = []
        for c in cpulist.split(','):
            if c.find('-') != -1:
                (x,y) = c.split('-')
                for i in range(int(x),int(y)+1):
                    cpus.append(int(i))
            else:
                # remove this element from the list
                if c[0] == '^':
                    cpus = [x for x in cpus if x != int(c[1:])]
                else:
                    cpus.append(int(c))
        cpus.sort()
        return ",".join(map(str, cpus))

    dom  = args[0]
    vcpu = args[1]
    if args[2] == 'all':
        cpumap = cpu_make_map('0-63')
    else:
        cpumap = cpu_make_map(args[2])

    if serverType == SERVER_XEN_API:
        server.xenapi.VM.add_to_VCPUs_params_live(
            get_single_vm(dom), "cpumap%i" % int(vcpu), cpumap)
    else:
        server.xend.domain.pincpu(dom, vcpu, cpumap)


-------------------------------------------------------------------------------
3445  commands = {

3475    # cpu commands
          "vcpu-pin": xm_vcpu_pin,
           "vcpu-list": xm_vcpu_list,
           "vcpu-set": xm_vcpu_set,

-------------------------------------------------第二層分割線---------------------------------------------

/xen-4.0.0/tools/python/xen/xend/XendDomain.py

1564       def domain_pincpu(self, domid, vcpu, cpumap):
        """Set which cpus vcpu can use

        @param domid: Domain ID or Name
        @type domid: int or string.
        @param vcpu: vcpu to pin to
        @type vcpu: int
        @param cpumap:  string repr of usable cpus
        @type cpumap: string
        @rtype: 0
        """
        dominfo = self.domain_lookup_nr(domid)
        if not dominfo:
            raise XendInvalidDomain(str(domid))

        # if vcpu is keyword 'all', apply the cpumap to all vcpus
        if str(vcpu).lower() == "all":
            vcpus = range(0, int(dominfo.getVCpuCount()))
        else:
            vcpus = [ int(vcpu) ]
      
        # set the same cpumask for all vcpus
        rc = 0
        cpus = dominfo.getCpus()
        cpumap = map(int, cpumap.split(","))
        for v in vcpus:
            try:
                if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
                    rc = xc.vcpu_setaffinity(dominfo.getDomid(), v, cpumap)
                cpus[v] = cpumap                     
            except Exception, ex:
                log.exception(ex)
                raise XendError("Cannot pin vcpu: %d to cpu: %s - %s" % \
                                (v, cpumap, str(ex)))
        dominfo.setCpus(cpus)
        self.managed_config_save(dominfo)

        return rc


---------------------------------------第三層分割線--------------------------------------------------

/xen-4.0.0/tools/python/xen/lowlevel/xc/xc.c

 #define PKG "xen.lowlevel.xc"
 #define CLS "xc"


static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
                                       PyObject *args,
                                       PyObject *kwds)
{
    uint32_t dom;
    int vcpu = 0, i;
    uint64_t  *cpumap;
    PyObject *cpulist = NULL;
    int nr_cpus, size;
    xc_physinfo_t info = {0};
    uint64_t cpumap_size = sizeof(*cpumap);

    static char *kwd_list[] = { "domid", "vcpu", "cpumap", NULL };

    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|iO", kwd_list,
                                      &dom, &vcpu, &cpulist) )
        return NULL;

    if ( xc_physinfo(self->xc_handle, &info) != 0 )
        return pyxc_error_to_exception();
 
    nr_cpus = info.nr_cpus;

    size = (nr_cpus + cpumap_size * 8 - 1)/ (cpumap_size * 8);
    cpumap = malloc(cpumap_size * size);
    if(cpumap == NULL)
        return pyxc_error_to_exception();

    if ( (cpulist != NULL) && PyList_Check(cpulist) )
    {
        for ( i = 0; i < size; i++)
        {
            cpumap[i] = 0ULL;
        }
        for ( i = 0; i < PyList_Size(cpulist); i++ )
        {
            long cpu = PyInt_AsLong(PyList_GetItem(cpulist, i));
            cpumap[cpu / (cpumap_size * 8)] |= (uint64_t)1 << (cpu % (cpumap_size * 8));
        }
    }
 
    if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap, size * cpumap_size) != 0 )
    {
        free(cpumap);
        return pyxc_error_to_exception();
    }
    Py_INCREF(zero);
    free(cpumap);
    return zero;
}


---------------------------------------------------------------------------------
1755    static PyMethodDef pyxc_methods[] = {
-------------------------------------------------------
1829    "vcpu_setaffinity",
      (PyCFunction)pyxc_vcpu_setaffinity,
      METH_VARARGS | METH_KEYWORDS, "\n"
      "Pin a VCPU to a specified set CPUs.\n"
      " dom [int]:     Identifier of domain to which VCPU belongs.\n"
      " vcpu [int, 0]: VCPU being pinned.\n"
      " cpumap [list, []]: list of usable CPUs.\n\n"
      "Returns: [int] 0 on success; -1 on error.\n" },
--------------------------------------------------------
2268  };

--------------------------------------------------------------------------------------

/xen-4.0.0/tools/libxc/xc_domain.c

int xc_vcpu_setaffinity(int xc_handle,
                        uint32_t domid,
                        int vcpu,
                        uint64_t cpumap)
{
    DECLARE_DOMCTL;
    int ret = -1;
    uint8_t local[sizeof (cpumap)];

    domctl.cmd = XEN_DOMCTL_setvcpuaffinity;
    domctl.domain = (domid_t)domid;
    domctl.u.vcpuaffinity.vcpu    = vcpu;

    bitmap_64_to_byte(local, &cpumap, sizeof(cpumap) * 8);

    set_xen_guest_handle(domctl.u.vcpuaffinity.cpumap.bitmap, local);

    domctl.u.vcpuaffinity.cpumap.nr_cpus = sizeof(cpumap) * 8;
   
    if ( lock_pages(local, sizeof(local)) != 0 )
    {
        PERROR("Could not lock memory for Xen hypercall");
        goto out;
    }

    ret = do_domctl(xc_handle, &domctl);

    unlock_pages(local, sizeof(local));

 out:
    return ret;
}