OpenMP - WindyHana's Solanara

목차

개요

OpenMP 예제

OpenMP 지원 확인

OpenMP가 지원되는지는 _OPENMP 이 정의되어있는지 확인하면 된다. omp_get_* 함수를 사용해 각종 상태를 알아낼 수 있다.
openmp_status.c
다운로드 (625 바이트)
/*
	Print OpenMP status.
	WindyHana's Solanara: OpenMP http://www.solanara.net/solanara/openmp
	cc -O3 -xopenmp=parallel -o openmpstatus openmp_status.c
*/
#include <stdio.h>
#include <omp.h>

int main(int argc, char *argv[]) {
	int mt = 1, iam = 0, np = 1, omp = 0;

	#pragma omp parallel default(shared) private(mt, iam, np, omp)
	{
		#if defined (_OPENMP)
			mt = omp_get_max_threads();
			np = omp_get_num_threads();
			iam = omp_get_thread_num();
			omp = 1;
		#endif
		if (omp) {
			printf("MaxThreads: %d, UsedThreads: %d, ThreadNum: %d\n", mt, np, iam);
		} else {
			printf("No OpenMP used.\n");
		}
	}
	return 0;
}
root@wl ~ # export OMP_NUM_THREADS=2
root@wl ~ # cc -O3 -xopenmp=parallel -o openmpstatus openmp_status.c
root@wl ~ # ./openmpstatus
MaxThreads: 2, UsedThreads: 2, ThreadNum: 0
MaxThreads: 2, UsedThreads: 2, ThreadNum: 1
root@wl ~ # 

OpenMP 환경변수

※ 범용 OpenMP 환경 변수 ※ 4개의 코어를 가지지 않은 시스템에서 강제로 4개의 쓰레드를사용하게 하려면 다음과 같이 하면 된다. 싱글 코어를 가지고 있는 경우 아래와 같이 하면 강제로 4개의 쓰레드를 사용하게 된다. (당연히 성능은 떨어진다)
root@wl ~ # export OMP_NUM_THREADS=4
root@wl ~ # export OMP_DYNAMIC=FALSE
root@wl ~ # ./matrix
※ 솔라리스 전용 OpenMP 환경 변수

OpenMP를 이용한 행렬 계산

[omp parallel for] 구문에 대한 예제이다.
openmp_matrix.c
다운로드 (767 바이트)
/*
	Calc simple matrix
	WindyHana's Solanara: OpenMP http://www.solanara.net/solanara/openmp
	cc -O3 -mt -lmtmalloc -xopenmp=parallel -o openmpmatrix openmp_matrix.c
*/
#include <stdio.h>
#include <omp.h>
#include <mtmalloc.h>
const int nrows =      10;
const int ncols = 1000000;

int main(int argc, char *argv[]) {
	int* array[nrows];

	printf("Initializing...\n");
	#pragma omp parallel for
		for (int i = 0; i < nrows; i++) {
			array[i] = (int*) malloc(sizeof(int) * ncols);
		}

	#pragma omp parallel for
		for (int i = 0; i < nrows; i++) {
			printf("Calc %d column by ThreadNum %d\n", i, omp_get_thread_num());
			for (int j = 0; j < ncols; j++) {
				for (int k = 0; k < nrows; k++) {
					array[i][j] = array[i][k] * array[k][j];
				}
			}
		}
	return 0
}
root@wl ~ # export OMP_NUM_THREADS=2
root@wl ~ # cc -O3 -mt -lmtmalloc -xopenmp=parallel -o openmpmatrix openmp_matrix.c
root@wl ~ # ./openmpmatrix
Initializing...
Calc 0 column by ThreadNum 0
Calc 5 column by ThreadNum 1
Calc 6 column by ThreadNum 1
Calc 1 column by ThreadNum 0
Calc 2 column by ThreadNum 0
Calc 7 column by ThreadNum 1
Calc 3 column by ThreadNum 0
Calc 8 column by ThreadNum 1
Calc 4 column by ThreadNum 0
Calc 9 column by ThreadNum 1
root@wl ~ #

OpenMP를 이용한 QuickSort

openmp_quick.c
다운로드 (3,131 바이트)
/*
	QuickSort for Solaris, OpenMP
	WindyHana's Solanara: OpenMP http://www.solanara.net/solanara/openmp
	cc -O3 -xopenmp=parallel -o openmpquick openmp_quick.c
*/
#include <stdio.h>
#include <stdlib.h> 
#include <string.h>
#include <omp.h>
#include <sys/time.h>

#define MAX_ELEMENTS 500000
#define MEMALLOCSIZE MAX_ELEMENTS * sizeof(int)

void quicksort(unsigned int * data, int lo, int hi);
void quicksort_openmp(unsigned int * data, int lo, int hi);
void printdata(unsigned int * data, int size);
unsigned long long gettimestamp();

int main(int argc, char *argv[]){
	unsigned int * data;
	unsigned int * data2;
	unsigned long long e1, e2, t1, t2;
	unsigned int seed = (unsigned int)gettimestamp();
	printf("Seed is %d\n", seed);
	srand(seed);
	
	printf("Alloc Memory: %d * 2 bytes\n", MEMALLOCSIZE);
	data = (unsigned int *) malloc(MEMALLOCSIZE);
	data2 = (unsigned int *) malloc(MEMALLOCSIZE);

	printf("Initializing Data [%d]\n", MAX_ELEMENTS);
	for (int i = 0; i < MAX_ELEMENTS; i++) { 
		int r1 = rand() & 0x0000FFFF;
		int r2 = rand() & 0x0000FFFF;
		unsigned int t = (r1 << 16) | r2;
		data[i] = t;
	}
	memcpy((void *)data2, (void *)data, MEMALLOCSIZE);

    #if defined (_OPENMP)
		printf("Max OpenMP Threads: %d\n", omp_get_max_threads());
    #endif

	t1 = gettimestamp();
	quicksort(data, 0, MAX_ELEMENTS - 1);
	t2 = gettimestamp();
	printf("QuickSort\n");
	printdata(data, MAX_ELEMENTS);
	e1 = t2 - t1;

	t1 = gettimestamp();
	quicksort_openmp(data2, 0, MAX_ELEMENTS - 1);
	t2 = gettimestamp();
	printf("QuickSort(OpenMP)\n");
	printdata(data2, MAX_ELEMENTS);
	e2 = t2 - t1;

	printf("       Elapsed: %lld\n", e1);
	printf("OpenMP Elapsed: %lld\n", e2);
	free(data);
	free(data2);
	return 0;
}

void quicksort_openmp(unsigned int * data, int lo, int hi) {
	int i = lo, j = hi, temp;
	int x = data[(lo + hi) / 2];
	do { 
		while (data[i] < x) i++; 
		while (data[j] > x) j--;
		if (i <= j) {
			temp = data[i]; data[i] = data[j]; data[j] = temp; // SWAP
			i++; j--;
		}
	} while (i <= j);
	#pragma omp parallel sections
	{
		#pragma omp section
		if (lo < j) quicksort(data, lo, j);
		#pragma omp section
		if (i < hi) quicksort(data, i, hi);
	}
}

// Just remove openmp progma from quicksort_openmp()
void quicksort(unsigned int * data, int lo, int hi) {
	int i = lo, j = hi, temp;
	int x = data[(lo + hi) / 2];
	do { 
		while (data[i] < x) i++; 
		while (data[j] > x) j--;
		if (i <= j) {
			temp = data[i]; data[i] = data[j]; data[j] = temp; // SWAP
			i++; j--;
		}
	} while (i <= j);
	if (lo < j) quicksort(data, lo, j);
	if (i < hi) quicksort(data, i, hi);
}

// print integer array (for debug)
void printdata(unsigned int * data, int size) {
	if (size > 100) {
		for (int i = 0; i < 2; i ++) {
			printf("data[%d] = %d\n", i, data[i]);
		}
		printf("...\n");
		for (int i = size - 3; i < size; i ++) {
			printf("data[%d] = %d\n", i, data[i]);
		}
	} else {
		for (int i = 0; i < size; i ++) {
			printf("data[%d] = %d\n", i, data[i]);
		}
	}
}

// get current timestamp (micro seconds)
unsigned long long gettimestamp() {
	struct timeval t;
	gettimeofday(&t, NULL);
	return t.tv_sec * 1000000 + t.tv_usec;
}
root@wl ~ # export OMP_NUM_THREADS=2
root@wl ~ # cc -O3 -xopenmp=parallel -o openmpquick openmp_quick.c
root@wl ~ # ./openmpquick
Seed is 1389706669
Alloc Memory: 2000000 * 2 bytes
Initializing Data [500000]
Max OpenMP Threads: 2
QuickSort
data[0] = 636
data[1] = 2446
...
data[499997] = 2147448088
data[499998] = 2147448460
data[499999] = 2147449545
QuickSort(OpenMP)
data[0] = 636
data[1] = 2446
...
data[499997] = 2147448088
data[499998] = 2147448460
data[499999] = 2147449545
       Elapsed: 84537
OpenMP Elapsed: 77274
root@wl ~ #

OpenMP를 이용한 소수 찾기

openmp_prime.c
다운로드 (758 바이트)
/*
	Print Prime Number
	WindyHana's Solanara: OpenMP http://www.solanara.net/solanara/openmp
	cc -O3 -mt -lmtmalloc -xopenmp=parallel -o openmpprime openmp_prime.c
*/
#include <stdio.h>
#include <omp.h>

#define START 100000000
#define END   999999999

// 주어진 값이 소수인지 판별하는 함수. 코드는 최적화되어있지 않다.
int isPrime(long long l) {
	if (l % 2 == 0) {
		return 0;
	}
	long long max = (l + 1) / 2;
	
	for (long long i = 2; i < max; i ++) {
		if (l % i == 0) {
			return 0;
		}
	}
	return 1;
}

int main(int argc, char *argv[]) {
	#pragma omp parallel for
    for (long long s = START; s <= END; s ++) {
    	if (isPrime(s)) {
    		printf("PRIME NUMBER: %lld on %d\n", s, omp_get_thread_num());
    	}
	}
	return 0;
}
root@wl ~ # export OMP_NUM_THREADS=2
root@wl ~ # cc -O3 -mt -lmtmalloc -xopenmp=parallel -o openmpprime openmp_prime.c
root@wl ~ # ./openmpprime
PRIME NUMBER: 100000007 on 0
PRIME NUMBER: 100000037 on 0
PRIME NUMBER: 100000039 on 0
PRIME NUMBER: 100000049 on 0
PRIME NUMBER: 100000073 on 0
PRIME NUMBER: 550000001 on 1
PRIME NUMBER: 100000081 on 0
PRIME NUMBER: 100000123 on 0
PRIME NUMBER: 100000127 on 0
PRIME NUMBER: 100000193 on 0
PRIME NUMBER: 100000213 on 0
PRIME NUMBER: 100000217 on 0
PRIME NUMBER: 550000007 on 1
...
RSS ATOM XHTML 1.0 CSS3