trans.c

/* 
 * Peter Maida, Paris Lopez
 * December 10, 2017
 */


/* 
 * trans.col - Matrix transpose B = A^T
 *
 * Each transpose function must have a prototype of the form:
 * void trans(int M, int N, int A[N][M], int B[M][N]);
 *
 * A transpose function is evaluated by counting the number of misses
 * on a 1KB direct mapped cache with a block size of 32 bytes.
 */ 
#include <stdio.h>
#include "cachelab.h"

int is_transpose(int M, int N, int A[N][M], int B[M][N]);

/* 
 * transpose_submit - This is the solution transpose function that you
 *     will be graded on for Part B of the assignment. Do not change
 *     the description string "Transpose submission", as the driver
 *     searches for that string to identify the transpose function to
 *     be graded. 
 */
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N]) {

	// set block side lengths based on best test results
	int blockLength;
	if (N == 32 && M == 32)
		blockLength = 8;
	else if (N == 64 && M == 64)
		blockLength = 4;
	else
		blockLength = 16;


	// loop through each block in the cache, row major
	for (int row = 0; row < N; row += blockLength) {
		for (int col = 0; col < M; col += blockLength) {
		
			// Break up each test case
			if (N == 32 && M == 32) {
				// the first test case
				
				// loop through each row in the current block
				for (int rowOffset = 0; rowOffset < blockLength; rowOffset++) {
				
					int blockRow = row + rowOffset; // the current row of the block 
					int center; // used when the matrix is a square
					
					// loop through each cell in the current row
					for (int colOffset = 0; colOffset < blockLength; colOffset++) {
					
						int blockCol = col + colOffset; // the current col of the block
						
						if(blockRow == blockCol) {
							// if we are on the matrix diagonal, store the center value
							center = A[blockRow][blockCol];
						} else {
							// if we are not on the diagonal, put A into B
							B[blockCol][blockRow] = A[blockRow][blockCol];
						}
						
					}
					
					if (row == col) {
						// if the matrix is square, store the center value on the diagonal
						B[blockRow][blockRow] = center;
					}
					
				}
				
			} else if (N == 64 && M == 64) {
				//the second test case
				
				// create a 1D array with enough room for all elements in the 2D block
				int blockArea = blockLength * blockLength;
				int elements[blockArea];
				
				// loop through the 2D block and store each element in the 1D array
				for (int i = 0; i < blockArea; i++) {
					// put each element from A into the array, row major
					elements[i] = A[row+(i/blockLength)][col+(i%blockLength)];
				}
				
				// fire through the 1D array and put values back into a 2D block
				for (int i = 0; i < blockArea; i++) {
					// put each element from the array back into B, col major
					B[col+(i%blockLength)][row+(i/blockLength)] = elements[i];
				}
					
			} else {
				// the third test case
				
				// loop through each cell in the block
				for(int rowOffset = 0; rowOffset < blockLength; rowOffset++) {
					for(int colOffset = 0; colOffset < blockLength; colOffset++){
					
						// the actual row and col of this block
						int blockRow = row + rowOffset;
						int blockCol = col + colOffset;
						
						if (blockRow < N && blockCol < M) {
							// if still within bounds, store A into B
							B[blockCol][blockRow] = A[blockRow][blockCol];
						} else{
							// if out of bounds, leave loop
							break;
						}
							
					}
				}
				
			} // end test case if
			
		} // end column for loop
	} // end row for loop
	
} // end transpose submit function


/* 
 * trans - A simple baseline transpose function, not optimized for the cache.
 */
char trans_desc[] = "Simple row-wise scan transpose";
void trans(int M, int N, int A[N][M], int B[M][N]) {

    int i, j, tmp;

    for (i = 0; i < N; i++) {
        for (j = 0; j < M; j++) {
            tmp = A[i][j];
            B[j][i] = tmp;
        }
    }    

}


/*
 * registerFunctions - This function registers your transpose
 *     functions with the driver.  At runtime, the driver will
 *     evaluate each of the registered functions and summarize their
 *     performance. This is a handy way to experiment with different
 *     transpose strategies.
 */
void registerFunctions() {

    /* Register your solution function */
    registerTransFunction(transpose_submit, transpose_submit_desc); 

    /* Register any additional transpose functions */
    registerTransFunction(trans, trans_desc); 

}


/* 
 * is_transpose - This helper function checks if B is the transpose of
 *     A. You can check the correctness of your transpose by calling
 *     it before returning from the transpose function.
 */
int is_transpose(int M, int N, int A[N][M], int B[M][N]) {

    int i, j;

    for (i = 0; i < N; i++) {
        for (j = 0; j < M; ++j) {
            if (A[i][j] != B[j][i]) {
                return 0;
            }
        }
    }
    return 1;
    
}