Using Hardware Trace for Performance Analysis
by Michael Lindahl

Listing One

/* This code implements a simple bubble sort algorithm.  Figure 1 shows the 
 * output from a traditional profiling tool for this code.  You can see that
 * roughly 28% of the time running this program consists of copying memory 
 * using the memset function.  From looking at the code, this is a non-obvious
 * result.
 */
#include <stdio.h>
#define N 10
int array_to_sort[N] = {15, 6, 25, 36, 92, 0, 2, 67, 82, 91};

void bubble_sort(int *array, int size)
{
    int i, j;
    for(i=0; i<size-1; i++) {
    for(j=0; j<size-i-1; j++) {
        if(array[j] > array[j+1]) {
        int tmp = array[j];
        array[j] = array[j+1];
        array[j+1] = tmp;
        }
    }
    }
}
void print_array(int *array, int size)
{
    int i;
    for(i=0; i<size; i++) {
    printf("%d\n", array[i]);
    }
}
int main(int argc, char *argv[])
{
    int tmp_array[N];
    memcpy(tmp_array, array_to_sort, sizeof(array_to_sort));
    bubble_sort(tmp_array, N);
    print_array(tmp_array, N);
    return 0;
}


Listing Two

/* This code contains an implementation of the basic quicksort algorithm. 
 * The program's main loop reads an array, sorts it and then verifies that 
 * the array is properly sorted.  By tracing this program, we can easily 
 * identify slow call to quick_sort() and determine causes of this slowdown.
 */

#include <stdio.h>
#define N 100

int array1[N];
int array2[N];

#define swap(a, b) { int tmp = a; a = b; b = tmp; }

void init()
{
    int i;
    for(i=0; i<N; i++) {
    array1[i] = rand() % 100;
    }   
}
void read_array(int *dest, int *size, int which_array)
{
    *size = N;
    if(which_array != 6) {
    memcpy(dest, array1 + which_array,
        sizeof(int) * (N - which_array));
    memcpy(dest+N-which_array, array1,
        sizeof(int) * which_array);
    } else {
    int i;
    for(i=0; i<N; i++) {
        array2[i] = i;
    }
    memcpy(dest, array2, sizeof(int) * *size);
    }
}
void quick_sort_r(int *array, int l, int r)
{
    int partition_key, i, j;
    if(r <= l)
    return;

    i = l-1;
    j = r;
    partition_key = array[r];
    while(1) {
    while(array[++i] < partition_key)
        ;
    while(partition_key < array[--j]) {
        if( j == l )
        break;
    }
    if( i >= j )
        break;
    swap(array[i], array[j]);
    }
    swap(array[i], array[r]);
    quick_sort_r(array, l, i-1);
    quick_sort_r(array, i+1, r);
}
void quick_sort(int *array, int size)
{
    quick_sort_r(array, 0, size-1);
}
int verify_array(int *array, int size)
{
    int i;
    int last_val = 0;
    for(i=0; i<size; i++) {
    if(array[i] < last_val)
        return 0;
    last_val = array[i];
    }
    return 1;
}
int main(int argc, char *argv[])
{
    int i;
    int tmp_array[N];
    int size;
    init();
    for(i=0; i<10; i++) {
    // Let's read in an array and sort it
    read_array(tmp_array, &size, i);
    quick_sort(tmp_array, size);

    // Now let's verify that our array is
    // sorted properly.
    if(verify_array(tmp_array, size) == 0) {
        printf("Array not sorted correctly\n");
        break;
    }
    }
    return 0;
}







3


