1. 程式人生 > >深入理解計算機系統家庭作業第五章

深入理解計算機系統家庭作業第五章

/*

***5.15

*/

A.    畫圖略

B.   3

C.   1

D.   乘法不在關鍵路徑上,故乘法可以按流水線執行

/*

***5.16

*/

A.   每次要載入兩個資料,故至少需要兩個週期

B.   迴圈展開並沒有改變關鍵路徑長

/*

***5.17

*/

A.   載入資料的時間

B.   IA32沒有足夠的暫存器來儲存臨時變數

/*

***5.18

*/

void inner4 (vec_ptr x,vec_ptr y;data_t *dest)
{
	long int i;
	int length = vec_length(x);
	data_t *xdata = get_vec_start(x);
	data_t *ydata = get_vec_start(y);
	data_t sum = (data_t)0;

	for(i=0;i<length - 2;i+=3)
	{
		sum= sum+(xdata[i]*ydata[i]+xdata[i+1]*ydata[i+1]+xdata[i+2]*ydata[i+2]);
	}
	for(;i<length ;i++)
	{
		sum = sum + xdata[i]*ydata[i]; 
	}
	*dest = sum;
}

/*

***5.19

*/

void *word_memeset(void *s, int c, size_t n)
{
	size_t cnt = 0;
	size_t k = sizeof(unsigned long);
	unsigned long l;
	unsigned char* schar = (unsigned char*)s;
	unsigned char* lchar = (unsigned char *)&l;
	//先將long的各位元組變為c的低位位元組
	for(int i = 0; i < k; i++)
		lchar[i] = (unsigned char)c;
	//將地址調整為k的倍數,直到能夠被k整除
	while((size_t)schar % k)
	{
		*schar++ = (unsigned char)c;
		cnt++;
	}

	//調整完地址之後求出新的n,更新cnt=0
	n = n - cnt;
	cnt = 0;
	for(int i = 0;i < n - k +1;i+=k)
	{
		for(int j = 0;j < k/4;j++)
		{
			schar[0] = lchar[0];
			schar[1] = lchar[1];
			schar[2] = lchar[2];
			schar[3] = lchar[3];
			schar += 4;
			cnt +=4;
		}
	}

	//遍歷最後幾個元素

	while(cnt < n)
	{
		*schar++ = (unsigned char)c;
		cnt++;
	}
	return s;

}

/*

***5.20

*/

//直接求和法的並行
double poly(double a[], double x, int degree)
{
	long int i;
	double result = a[0];
	double result1 = 0,result2 = 0,result3 = 0,result4 = 0;
	double result5 =0;
	double xpwr1 = x;
	double xpwr2 = x * xpwr1;
	double xpwr3 = x * xpwr2;
	double xpwr4 = x * xpwr3;
	double xpwr5 = x * xpwr4;
	double step = xpwr5;

	for(i = 1;i <= degree - 4;i += 5)
	{
		result1 += a[i] * xpwr1;
		result2 += a[i+1] * xpwr2;
		result3 += a[i+2] * xpwr3;
		result4 += a[i+3] * xpwr4;
		result5 += a[i+4] * xpwr5;
		xpwr1 *= step;
		xpwr2 *= step;
		xpwr3 *= step;
		xpwr4 *= step;
		xpwr5 *= step;
	}
	for(;i <= degree;i++)
	{
		result += a[i] * xpwr1;
		xpwr1 *= x;
	}
	return result = result + result1 + result2 + result3 + result4 + result5;

}

//Horner法的並行
//其實也是採用臨時變數的並行累積,以等差的間隔將多項式分成n個部分後再用Horner方法
double polyh(double a[], double x, int degree)
{
	long int i;
	double result = 0;
	
	if(degree < 5)
	{
		result = a[degree];
		for(i = degree - 1; i >= 0; i--)
			result = a[i] + x * result;
	}
	//否則,5路平行計算
	else
	{
		double result1 = a[degree];
		double result2 = a[degree - 1];
		double result3 = a[degree - 2];
		double result4 = a[degree - 3];
		double result5 = a[degree - 4];
		double step = x * x * x * x * x;
		for(i = degree - 5; i >= 4; i-= 5)
		{
			result1 = a[i] + result1 * step;
			result2 = a[i - 1] + result1 * step;
			result3 = a[i - 2] + result2 * step;
			result4 = a[i - 3] + result3 * step;
			result5 = a[i - 4] + result4 * step;
		}
		for(;i >= 0;i--)
		{
			result = a[i] + x * result;
		}

		result += result1 + result2 + result3 + result4 + result5;
	}

	return result;
	
}

/*

***5.21

*/

void psum1(float a[], float p[], long int n)
{
	long int i;
	long int last_val,val1,val2,val3;
	p[0] = last_val = a[0];
	for(i = 1; i < n - 2; i += 3)
	{
		//每個加法和賦值運算都可並行
		val1 = last_val + a[i];
		val2 = last_val + (a[i] + a[i + 1]);
		val3 = last_val + ((a[i] + a[i + 1]) + a[i + 2]);
		p[i] = val1;
		p[i + 1] = val2;
		p[i + 2] = val3;
		last_val = val3;
	}
	for(;i < n;i++)
	{
		val1 = last_val + a[i];
		p[i] = val1;
		last_val = val1;
	}
}

/*

***5.22

*/

代入公式  S = 1/((1-α) + α/k)     可得:

方案1加速比為   1.25

方案2加速比為   1.2

故第一種方案比較好