1. 程式人生 > >linux多執行緒程式設計--對三層for迴圈的優化

linux多執行緒程式設計--對三層for迴圈的優化

目標:將下面3層for迴圈的程式碼進行優化:

#include <iostream>
#include <vector>
#include <pthread.h>

using namespace std;

typedef vector< vector<long> > lvec;

long arr[1000][5000] = {};

long acculate(int, int);
lvec& forarr(lvec&, long, long);



int main()
{
    long sum(0);
    lvec ivec;
    ivec.resize(1000);
    
    for (long i=0; i<1000; ++i) {
        for (long j=0; j<5000; ++j)
            ivec[i].push_back(j);
    }
    cout << ivec[999][4999] << endl;
    
    for (long i=0; i<1000; ++i) {
        ivec = forarr(ivec, i, 0);
        pthread_t id;
        int ret = pthread_create(&id, 0, forarr, 0);
        if (!ret)
            cout << "failed to create thread!" << endl;
        ivec = forarr(ivec, i, 2500);
    }
                  
    cout << ivec[999][4999] << endl;              
}

lvec& forarr(lvec& in_vec, long f, long delta) {
    for (long j=0; j<2500+delta; ++j)
            in_vec[f][j] = acculate(f, j);
    return in_vec;
}

// 得到第m幀第n個點的值
long acculate(int m, int n)
{
    long sum(0);
    for (long k=0; k<5000; ++k)
        sum += k;
    return sum + m*n;
}
將array改成vector後,執行時間為88s。

下面用8個執行緒平行計算後,執行時間為11s,

直接貼程式碼:

#include <iostream>
#include <vector>
#include <pthread.h>
#include <sstream>

using namespace std;

typedef vector< vector<long> > lvec;

long arr[1000][5000] = {};

// 結構體,用來儲存執行緒函式引數
struct para {
    lvec* longvec;
    long f;
    long start_state, end_state;
};

long acculate(int, int);
void* forarr(void*);
vector<int> aver (int, int);

int main()
{
    long sum(0);
    lvec ivec;
    ivec.resize(1000);
    
    // 初始化
    for (long i=0; i<1000; ++i) {
        for (long j=0; j<5000; ++j)
            ivec[i].push_back(j);
    }
    cout << ivec[999][4999] << endl;
    
    // 用8個執行緒去跑,執行緒數等於處理器數最優
    int threads_num = 8;
    // 得到每個執行緒要處理的狀態範圍
    vector<int> i_vec;
    i_vec = aver(5000, threads_num);
    for (int i=0; i<threads_num; ++i)
        if (i!=0)
            i_vec[i] = i_vec[i] + i_vec[i-1];          
    
    vector<struct para> struct_vec;
    vector<pthread_t> pthreadT_vec;
    for (long i=0; i<1000; ++i) {
        // 清理的操作尤其重要
        struct_vec.clear();
        pthreadT_vec.clear();
        
        // 完成引數的設定
        for (int ii=0; ii<threads_num; ++ii) {
            struct para p_struct;
            p_struct.longvec = &ivec;
            p_struct.f = i;
            if (i==0)
                p_struct.start_state = 0;
            else
                p_struct.start_state = i_vec[ii-1];
            p_struct.end_state = i_vec[ii];
            struct_vec.push_back(p_struct);
            
            pthread_t id;
            pthreadT_vec.push_back(id); 
        }
        // 多個執行緒同時開啟
        for (int ii=0; ii<threads_num; ++ii)
            pthread_create(&pthreadT_vec[ii], NULL, forarr, &struct_vec[ii]);   
        // 等待執行緒的結束
        for (int ii=0; ii<threads_num; ++ii)
            pthread_join(pthreadT_vec[ii], NULL);  
    }
                  
    cout << ivec[999][4999] << endl;    
    return 0;
}

void forarr0(lvec& in_vec, long f, long st, long ed) {
    for (long j=st; j<ed; ++j)
            in_vec[f][j] = acculate(f, j);
    //return in_vec;
}

void* forarr(void* paralist) {
     struct para* p = (struct para*)paralist; 
     lvec* in_vec = p->longvec; // 用指標
     long fra = p->f;
     long start_s = p->start_state;
     long end_s = p->end_state;
     for (long j=start_s; j<end_s; ++j)
            (*in_vec)[fra][j] = acculate(fra, j);
     pthread_exit(NULL);
    //return in_vec;
}

// 得到第m幀第n個點的值
long acculate(int m, int n)
{
    long sum(0);
    for (long k=0; k<5000; ++k)
        sum += k;
    return sum + m*n;
}

vector<int> aver (int x, int y) {
    vector<int> invec;
    int m = x/y, n = x%y;
    for (int i=0; i<y; ++i) {
        if (i<n)
            invec.push_back(m+1);
        else
            invec.push_back(m);
    }
    return invec;
}
我的感想:

1 當執行緒數小於等於cpu數時,執行緒數與時間基本成反比,我的電腦是4核8cpu的,故用8個執行緒最為合適,執行緒再加大,其實多餘的執行緒還是處於等待狀態;

2 本函式只用到了linux下多執行緒處理的兩個最基本函式pthread_create和pthread_creat,因為這裡執行緒間資料的讀寫沒有衝突;當可能產生衝突時,就要用到加鎖技術,見之後的學習。