python單程序能否利用多核cpu的測試結論

阿新 • • 發佈：2018-12-23

在很早的時候，就聽網上的文章說:

python有GIL，所以在單程序內，即使使用多執行緒也無法利用到多核的優勢，同一時刻，python的位元組碼只會執行在一個cpu上。

以前也是奉為真理，直到今天在對自己的python server做效能測試的時候，發現一個python程序的cpu居然達到了120%。

當用c++程式設計的時候，如果使用多執行緒，那麼確實程序cpu超過100%非常正常，但是對python來說，似乎這樣就和網上的文章衝突了。

所以還是決定自己親身試驗一下，編寫程式碼如下：

Python

from thread import start_new_thread

def worker():
    while 1:
        #print 1
        pass

for it in range(0, 15):
    start_new_thread(worker, ())

raw_input()

1234567891011

fromthreadimportstart_new_threaddefworker():while1:#print 1passforit inrange(0,15):start_new_thread(worker,())raw_input()

執行環境為： centos6.4 64位， python 2.7.

得到的結果如下:

E588C2D7 1608 42CC B800 AD5338C87F47

可以清楚的看到，pid為31199的python程序cpu達到了787.9%，接近理論能達到的最大值 800%。

而上方的8個cpu也分別達到了近100%的利用率。

如果只是按照以上測試結果，確實可以得出的結論：python使用單程序，多執行緒確實能夠使用到多核cpu，並不是網上傳的結論。

但是，還是希望如果有讀者對這塊有更深入的研究能夠進行批評指正，謝謝～

8月15日補充

感謝 la.onger 等幾位博友的討論，現在增加一個測試，用來測試純cpu計算用一個執行緒or多個執行緒完成的總時間的差別，程式碼如下:

Python

import time
from threading import Thread

LOOPS = 1000000
THREAD_NUM = 10
STEP_SIZE = 94753434

class Test(object):
    num = 1

    def work(self):
        for it in xrange(0, LOOPS):
            if self.num &gt; STEP_SIZE:
                self.num -= STEP_SIZE
            else:
                self.num += STEP_SIZE

    def one_thread_test(self):
        self.num = 1

        begin_time = time.time()

        for v in xrange(0, THREAD_NUM):
            self.work()

        print 'time passed: ', time.time() - begin_time

    def multi_thread_test(self):
        self.num = 1

        t_list = []

        begin_time = time.time()

        for v in xrange(0, THREAD_NUM):
            t = Thread(target=self.work)
            t.start()
            t_list.append(t)

        for it in t_list:
            it.join()

        print 'time passed: ', time.time() - begin_time

t = Test()
t.one_thread_test()
t.multi_thread_test()

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647

importtimefromthreadingimportThreadLOOPS=1000000THREAD_NUM=10STEP_SIZE=94753434classTest(object):num=1defwork(self):forit inxrange(0,LOOPS):ifself.num>STEP_SIZE:self.num-=STEP_SIZEelse:self.num+=STEP_SIZEdefone_thread_test(self):self.num=1begin_time=time.time()forvinxrange(0,THREAD_NUM):self.work()print'time passed: ',time.time()-begin_timedefmulti_thread_test(self):self.num=1t_list=[]begin_time=time.time()forvinxrange(0,THREAD_NUM):t=Thread(target=self.work)t.start()t_list.append(t)forit int_list:it.join()print'time passed: ',time.time()-begin_timet=Test()t.one_thread_test()t.multi_thread_test()

輸入結果如下:

Python

time passed:  3.44264101982
time passed:  7.22910785675

12	timepassed:3.44264101982timepassed:7.22910785675

使用多執行緒後，比不用多執行緒還慢

為了與c++版做對比，也開發了c++程式碼如下：

Python

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <iostream>
#include <memory>
#include <sstream>
#include <algorithm>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <sys/time.h>
#include <pthread.h>
using namespace std;

#define LOOPS 1000000
#define THREAD_NUM 10
#define STEP_SIZE   94753434

class Test
{
public:
    Test() {}
    virtual ~Test() {}

    void one_thread_test() {
        this->num = 1;

        gettimeofday(&m_tpstart,NULL);
        for (size_t i = 0; i < THREAD_NUM; ++i)
        {
            work();
        }

        gettimeofday(&m_tpend,NULL);

        long long timeuse=1000000*(long long)(m_tpend.tv_sec-m_tpstart.tv_sec)+m_tpend.tv_usec-m_tpstart.tv_usec;//微秒

        printf("time passed: %f\n", ((double)timeuse) / 1000000);
    }

    void multi_thread_test() {
        this->num = 1;
        int ret;

        vector<pthread_t> vecThreadId;//所有thread的id

        pthread_attr_t attr;
        pthread_attr_init (&attr);
        pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);

        gettimeofday(&m_tpstart,NULL);

        pthread_t threadId;
        for (int i = 0; i < THREAD_NUM; i++)
        {
            ret= pthread_create(&threadId, &attr, Test::static_run_work, (void*)this);
            if(ret!=0){
                pthread_attr_destroy (&attr);
            }
            vecThreadId.push_back(threadId);
        }
        pthread_attr_destroy (&attr);
        for(vector<pthread_t>::iterator it = vecThreadId.begin(); it != vecThreadId.end(); ++it)
        {
            pthread_join(*it, NULL);
        }

        gettimeofday(&m_tpend,NULL);

        long long timeuse=1000000*(long long)(m_tpend.tv_sec-m_tpstart.tv_sec)+m_tpend.tv_usec-m_tpstart.tv_usec;//微秒

        printf("time passed: %f\n", ((double)timeuse) / 1000000);
    }

    void work() {
        for (size_t i = 0; i < LOOPS; ++i) {
            if (this->num > STEP_SIZE) {
                this->num -= STEP_SIZE;
            }
            else {
                this->num += STEP_SIZE;
            }
        }
    }

    static void* static_run_work(void *args) {
        Test* t = (Test*) args;
        t->work();

        return NULL;
    }

public:
    int64_t num;
    struct timeval m_tpstart,m_tpend;
};

int main(int argc, char **argv)
{
    Test test;

    test.one_thread_test();
    test.multi_thread_test();
    return 0;
}

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106

#include <stdio.h>#include <string.h>#include <stdint.h>#include <iostream>#include <memory>#include <sstream>#include <algorithm>#include <string>#include <vector>#include <set>#include <map>#include <sys/time.h>#include <pthread.h>using namespace std;#define LOOPS 1000000#define THREAD_NUM 10#define STEP_SIZE 94753434classTest{public:Test(){}virtual~Test(){}void one_thread_test(){this->num=1;gettimeofday(&m_tpstart,NULL);for(size_ti=0;i<THREAD_NUM;++i){work();}gettimeofday(&m_tpend,NULL);longlongtimeuse=1000000*(longlong)(m_tpend.tv_sec-m_tpstart.tv_sec)+m_tpend.tv_usec-m_tpstart.tv_usec;//微秒printf("time passed: %f\n",((double)timeuse)/1000000);}void multi_thread_test(){this->num=1;intret;vector<pthread_t>vecThreadId;//所有thread的idpthread_attr_t attr;pthread_attr_init(&attr);pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_DETACHED);gettimeofday(&m_tpstart,NULL);pthread_t threadId;for(inti=0;i<THREAD_NUM;i++){ret=pthread_create(&threadId,&attr,Test::static_run_work,(void*)this);if(ret!=0){pthread_attr_destroy(&attr);}vecThreadId.push_back(threadId);}pthread_attr_destroy(&attr);for(vector<pthread_t>::iterator it=vecThreadId.begin();it!=vecThreadId.end();++it){pthread_join(*it,NULL);}gettimeofday(&m_tpend,NULL);longlongtimeuse=1000000*(longlong)(m_tpend.tv_sec-m_tpstart.tv_sec)+m_tpend.tv_usec-m_tpstart.tv_usec;//微秒printf("time passed: %f\n",((double)timeuse)/1000000);}void work(){for(size_ti=0;i<LOOPS;++i){if(this->num>STEP_SIZE){this->num-=STEP_SIZE;}else{this->num+=STEP_SIZE;}}}static void*static_run_work(void*args){Test*t=(Test*)args;t->work();returnNULL;}public:int64_t num;structtimeval m_tpstart,m_tpend;};intmain(intargc,char**argv){Testtest;test.one_thread_test();test.multi_thread_test();return0;}

輸出結果如下：

Python

time passed: 0.036114
time passed: 0.000513

12	timepassed:0.036114timepassed:0.000513

可見，c++版確實性能提高了非常多。
由此可見，python的多執行緒程式設計，在多核cpu利用上確實差一些。

python單程序能否利用多核cpu的測試結論

python單程序能否利用多核cpu的測試結論

python的多線程為什麽不能利用多核CPU？

python學習筆記- day10-【問題： python為什麽python的多線程不能利用多核CPU？】

為什麽python的多線程不能利用多核CPU，但是咱們在寫代碼的時候，多線程的確是在並發，而且還比單線程快。

python多執行緒為什麼不能利用多核cpu

Python怎麼利用多核cpu

淘寶面試題：如何充分利用多核CPU，計算很大的List中所有整數的和

如何利用多核CPU來加速你的Linux命令 — awk, sed, bzip2, grep, wc等

【好文】淘寶面試題：如何充分利用多核CPU，計算很大的List中所有整數的和

如何充分利用多核CPU，計算很大的List中所有整數的和

多核CPU上python多線程並行的一個假象

多核CPU利用測試

Zabbix通過SNMP監控多核CPU使用率時, 計算CPU平均使用率

linux top命令查看內存及多核CPU的使用講述【轉】

Zabbix監控多核CPU

查看系統cpu性能top和多核cpu使用性能

查看多核CPU各核的狀態

PHP多程序初探 --- 利用多程序開發點兒東西吧

linux top命令檢視記憶體及多核CPU的使用

對於多執行緒程式，單核cpu與多核cpu是怎麼工作的

python單程序能否利用多核cpu的測試結論

相關推薦