/* Compute integral from 0 to 1 4/(1+x*x). This shoudl be equal to PI. Parallelizing the for loop by assigning each thread a subset of the iterations. Use round robim (thread 0 gets iteration 0, NUM_THREADS, 2NUM_THREADS, ... and so on. TO avoid race condition to sum, use an array sof sums, one for each thread. Scalability sucks. */ #include #include #include #define NUM_THREADS 2 int main(int argc, char** argv) { long num_steps = 10000000; double step, pi; double t1, t2; int global_num_threads=0, i; //one sum for each thread double total_sum, sum[NUM_THREADS]; step = 1.0/(double)num_steps; t1 = omp_get_wtime(); omp_set_num_threads(NUM_THREADS); #pragma omp parallel private(i) { /* declaring these variables here makes them private */ double x; //this thread's id int id = omp_get_thread_num(); //number of threads int num_threads = omp_get_num_threads(); //master sets the global num_threads if (id ==0) global_num_threads = num_threads; //initialize its sum sum[id] = 0; printf("Start(%d)\n", id); //round robin scheduling: assign thread id, iterations id, id //+num_threads, id+2*num_threads, ... for (i=id; i