matlab - Implementing PCA in C -
i trying optimize code. need pca implementation in c results comparable pca in matlab. have implemented pca gsl library using link pca. below code :
#include <assert.h> #include <gsl/gsl_matrix.h> #include <gsl/gsl_statistics.h> #include <gsl/gsl_eigen.h> #include <gsl/gsl_blas.h> gsl_matrix* pca(const gsl_matrix* data, unsigned int l); int main() { int k = 0, = 0,j,rows,cols,count = 1,r,c; gsl_matrix *red_data,*m; rows=5;cols=5; m = gsl_matrix_alloc(rows,cols); /* create matrix */ /* initialise matrix */ (i=0;i<rows;i++) { (j=0;j<cols;j++) { gsl_matrix_set(m,i,j,count); count++; } } printf("matrix m\n"); (i=0;i<rows;i++) { (j=0;j<cols;j++) { printf("%f ",gsl_matrix_get(m,i,j)); } printf("\n"); } red_data = pca(m,4); r = red_data->size1; c = red_data->size2; printf("rows = %d, columns = %d \n",r,c); (i = 0; i<r; i++) { for(k = 0; k < c; k++) { printf("%lf ",gsl_matrix_get(red_data,i,k)); } printf("\n"); } return 0; } gsl_matrix* pca(const gsl_matrix* data, unsigned int l) { /* @param data - matrix of data vectors, mxn matrix, each column data vector, m - dimension, n - data vector count @param l - dimension reduction */ assert(data != null); assert(l > 0 && l < data->size2); unsigned int i; unsigned int rows = data->size1; unsigned int cols = data->size2; gsl_vector* mean = gsl_vector_alloc(rows); for(i = 0; < rows; i++) { gsl_vector_set(mean, i, gsl_stats_mean(data->data + * cols, 1, cols)); } // mean-substracted data matrix mean_substracted_data. gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols); gsl_matrix_memcpy(mean_substracted_data, data); for(i = 0; < cols; i++) { gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i); gsl_vector_sub(&mean_substracted_point_view.vector, mean); } gsl_vector_free(mean); // compute covariance matrix gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows); gsl_blas_dgemm(cblasnotrans, cblastrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix); gsl_matrix_free(mean_substracted_data); // eigenvectors, sort eigenvalue. gsl_vector* eigenvalues = gsl_vector_alloc(rows); gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows); gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows); gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace); gsl_eigen_symmv_free(workspace); gsl_matrix_free(covariance_matrix); // sort eigenvectors gsl_eigen_symmv_sort(eigenvalues, eigenvectors, gsl_eigen_sort_abs_desc); gsl_vector_free(eigenvalues); // project original dataset gsl_matrix* result = gsl_matrix_alloc(l, cols); gsl_matrix_view l_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, l); gsl_blas_dgemm(cblastrans, cblasnotrans, 1.0, &l_eigenvectors.matrix, data, 0.0, result); gsl_matrix_free(eigenvectors); // result n lxn matrix, each column original data vector reduced dimension m l return result; }
i used below commands:
$ gcc pca.c -o pca -lgsl -lgslcblas -lm $ ./pca
i have given 5 x 5 matrix , no:of principal components = 4. have compared results matlab ones. not matching. should comparable results ?. or there pca code implemented in c (need not gsl library) matching matlab results. thank ahead.
Comments
Post a Comment