12. Seminar - Experiments with parallelization...

Use OpenMP to parallelize next example program computing multiplication of a vector by matrix.

#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<omp.h>
#define abs(x) ((x)>0?(x):-(x))

void printUsage(const char *s){
	printf( " *********************************** \
			\n Usage: %s dimM dimN numOfThreads \
			\n *********************************** \
			\n Running with default parameters: %s 100 100 2\n\n",s,s);
}

void matrix_vector_mult_basic           (double *A, double *x, double *y, int M, int N);
void matrix_vector_mult_parallel_first  (double *A, double *x, double *y, int M, int N);
void matrix_vector_mult_parallel_second (double *A, double *x, double *y, int M, int N);
void matrix_vector_mult_parallel_third  (double *A, double *x, double *y, int M, int N);
void matrix_vector_mult_parallel_forth  (double *A, double *x, double *y, int M, int N);
void matrix_vector_mult_parallel_fifth  (double *A, double *x, double *y, int M, int N);

void sleep(int sleep_time_ms);
int  eq_test(double *a, double *b, int dim);
void show_vector(double *a, int dim);

int main(int argc, const char* argv[]){
	int ii,jj, numOfProcs;
	double tmp, sequential_time;
	
	struct timespec start, stop;
    double elapsed;
	
	double *A, *x, *y,*res;
	int M, N, numOfThreads;   // Dimenze matic, pocet vlaken
	
	if(argc < 4){             // Defaultni konfigurace
		printUsage(argv[0]);
		M=N=100;
		numOfThreads=2;
	}else{					  // Konfigurace nactena z prikazoveho radku
		M=atoi(argv[1]);
		N=atoi(argv[2]);
		numOfThreads = atoi(argv[3]);
	}

	/* Alokace pameti */
	if((x=(double*)malloc(N*sizeof(double)))==NULL) { printf("Not enough memory..\n"); return 1; }
	if((y=(double*)malloc(M*sizeof(double)))==NULL) { printf("Not enough memory..\n"); return 1; }
	if((res=(double*)malloc(M*sizeof(double)))==NULL) { printf("Not enough memory..\n"); return 1; }
	if((A=(double*)malloc(M*N*sizeof(double)))==NULL) { printf("Not enough memory..\n"); return 1; }
	
	/* Initialization */
	srand((unsigned)time(0));
	for(ii=0;ii<N;ii++)
		x[ii] = (double)rand()/(double)RAND_MAX;

	for(ii=0;ii<M;ii++)
		for(jj=0;jj<N;jj++)
			A[ii+jj*M] = (double)rand()/(double)RAND_MAX;
			
	/* */
	numOfProcs = omp_get_num_procs(); 
    omp_set_num_threads(numOfThreads); 
	printf("\n M=%d, N=%d, numOfThreads=%d\n\n",M,N,numOfThreads);

	//------------------------------
    clock_gettime(CLOCK_REALTIME, &start);
	matrix_vector_mult_basic(A, x, res, M, N);
    clock_gettime(CLOCK_REALTIME, &stop);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
	sequential_time = elapsed;
    printf( " Execution without parallelization:   %.2lf ms\n", elapsed);
    //------------------------------
	//------------------------------
    clock_gettime(CLOCK_REALTIME, &start);
	matrix_vector_mult_parallel_first(A, x, y, M, N);
    clock_gettime(CLOCK_REALTIME, &stop);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
    printf( " Parallel execution: version 1: %.2lf ms, Speed-up: %.2lf, Efektivnost: %.2lf\n", elapsed, sequential_time/elapsed, sequential_time/elapsed/numOfProcs);
	eq_test(res,y,M);
    //------------------------------
	//------------------------------
    clock_gettime(CLOCK_REALTIME, &start);
	matrix_vector_mult_parallel_second(A, x, y, M, N);
    clock_gettime(CLOCK_REALTIME, &stop);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
    printf( " Parallel execution: version 2: %.2lf ms, Speed-up: %.2lf, Efektivnost: %.2lf\n", elapsed, sequential_time/elapsed, sequential_time/elapsed/numOfProcs);
	eq_test(res,y,M);
    //------------------------------
    //------------------------------
    clock_gettime(CLOCK_REALTIME, &start);
	matrix_vector_mult_parallel_third(A, x, y, M, N);
    clock_gettime(CLOCK_REALTIME, &stop);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
    printf( " Parallel execution: version 3: %.2lf ms, Speed-up: %.2lf, Efektivnost: %.2lf\n", elapsed, sequential_time/elapsed, sequential_time/elapsed/numOfProcs);
	eq_test(res,y,M);
    //------------------------------
	//------------------------------
    clock_gettime(CLOCK_REALTIME, &start);
	matrix_vector_mult_parallel_forth(A, x, y, M, N);
    clock_gettime(CLOCK_REALTIME, &stop);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
    printf( " Parallel execution: version 4: %.2lf ms, Speed-up: %.2lf, Efektivnost: %.2lf\n", elapsed, sequential_time/elapsed, sequential_time/elapsed/numOfProcs);
	eq_test(res,y,M);
    //------------------------------
    printf( " \n");
	
	/* Free allocated memory */
    free(x);
	free(y);
	free(res);
	free(A);
	x = y = res = A = NULL;

	return 0;
}

void matrix_vector_mult_basic(double *A, double *x, double *y, int M, int N)
{
	/*
	Function computes multiplication of matrix by vector: y = A*x
	A - matrix of dimension MxN 
	x - vector of dimension N
	y - vector of dimension M
	*/
	int ii, jj;
	for(ii=0;ii<M;ii++){
		y[ii] = 0.0;
		for(jj=0;jj<N;jj++)
			y[ii] += A[ii+jj*M]*x[jj];
	}
}
/******************************************************************************/
void matrix_vector_mult_parallel_first(double *A, double *x, double *y, int M, int N)
{

}
/******************************************************************************/
void matrix_vector_mult_parallel_second(double *A, double *x, double *y, int M, int N)
{

}
/******************************************************************************/
void matrix_vector_mult_parallel_third(double *A, double *x, double *y, int M, int N)
{

}
/******************************************************************************/
void matrix_vector_mult_parallel_forth(double *A, double *x, double *y, int M, int N)
{

}
/******************************************************************************/
void matrix_vector_mult_parallel_fifth(double *A, double *x, double *y, int M, int N)
{

}
/******************************************************************************/
void sleep(int sleep_time_ms)
{
	struct timespec start, stop;
    double elapsed;
	clock_gettime(CLOCK_REALTIME, &start);
	do{
		clock_gettime(CLOCK_REALTIME, &stop);
		elapsed = (stop.tv_sec - start.tv_sec)*1000 + (double)(stop.tv_nsec - start.tv_nsec)/(double)1000000;
	}while(elapsed<sleep_time_ms);
}
/******************************************************************************/
int eq_test(double *a, double *b, int dim)
{
	int ii;
	for(ii=0;ii<dim;ii++)
		if(a[ii]!=b[ii]) { printf(" Vectors differ !!!\n\n");  return 0; }
	return 1;
}
/******************************************************************************/
void show_vector(double *a, int dim)
{
	int ii;
	for(ii=0;ii<dim;ii++)
		printf("%.2lf ",a[ii]);
	printf("\n");
}




Example of achieved results:

courses/b4m35pap/tutorials/12/start.txt · Last modified: 2018/09/23 21:16 by pisa