/* Compute integral from 0 to 1 4/(1+x*x). This should be equal to PI. Parallelizing the for loop by assigning each thread a subset of the iterations. Use round robim (thread 0 gets iteration 0, NUM_THREADS, 2NUM_THREADS, ... and so on. TO avoid race condition to sum, use synchronization. Scalability: Terrible. Note that most of the computation is serialized! */ #include #include #include #define NUM_THREADS 8 int main(int argc, char** argv) { long num_steps = 100; double step, pi; double t1, t2; int global_num_threads=0, i; double total_sum = 0; step = 1/(double)num_steps; t1 = omp_get_wtime(); omp_set_num_threads(NUM_THREADS); #pragma omp parallel private(i) { /* declaring these variables here makes them private */ double x; //this thread's id int id = omp_get_thread_num(); //number of threads int num_threads = omp_get_num_threads(); if (id ==0) global_num_threads = num_threads; printf("Start(%d)\n", id); //round robin scheduling: assign thread id, iterations id, id //+num_threads, id+2*num_threads, ... for (i=id; i