Optimizing Software For Multicore Processors
by Edwin Verplanke

Listing One

{
        vp_begin_threads();
        vp_threads_begun = 1;
}

void vp_begin_threads()
{
        int i;
        int mask = 0xf;
        vp_pthreads_data = vp_create_thread_data();
		if (vp_pthreads_data == NULL)
		{
			printf("Unable to allocate memory for threads.\n");
			return;
		}
       
        for(i=1;i<NUM_THREADS;i++)
        {
                vp_pthreads_args[i].data = vp_pthreads_data;
                vp_pthreads_args[i].id = i;
                pthread_create(&(vp_threads[i]), NULL, vp_thread_task_loop,
                                &(vp_pthreads_args[i]));
        }
}


Listing Two

vp_thread_big_loop_args loop_args[NUM_THREADS];
    	int num = (kcount)>>THREAD_SHIFT;
    	int extras = (kcount)&THREAD_MASK;
   	int cur_num = kstart;

    loop_args[0].vpc = vpc;
    loop_args[0].kstart = kstart;
    loop_args[0].kinc = kinc;
    loop_args[0].icount = icount;
    loop_args[0].jcount = jcount;
    loop_args[0].kcount = kcount;
    loop_args[0].istride = istride;
    loop_args[0].jstride = jstride;
    loop_args[0].kstride = kstride;
    loop_args[0].composite_func = composite_func;

    for(i=0;i<NUM_THREADS;i++)
    {
		loop_args[i] = loop_args[0];
        	loop_args[i].kmystart = cur_num; 
        	loop_args[i].id = i;
        	cur_num += (num * kincr);
		cur_num += ((i < extras) * kincr);
        	loop_args[i].kstop = cur_num; 
    }
                                                                                
   vp_pthreads_data->completed_threads = 0;
   for(i=1;i<NUM_THREADS;i++)
   {
        vp_pthreads_data->inputs[i] = &(loop_args[i]);
        vp_pthreads_data->task_number = LOOP_TASK;
        pthread_cond_signal(vp_pthreads_data->task_cond[i]);
   }


1


