matlab - Implementing PCA in C -


i trying optimize code. need pca implementation in c results comparable pca in matlab. have implemented pca gsl library using link pca. below code :

#include <assert.h> #include <gsl/gsl_matrix.h> #include <gsl/gsl_statistics.h> #include <gsl/gsl_eigen.h> #include <gsl/gsl_blas.h> gsl_matrix* pca(const gsl_matrix* data, unsigned int l); int main() {      int k = 0, = 0,j,rows,cols,count = 1,r,c;     gsl_matrix *red_data,*m;         rows=5;cols=5;         m = gsl_matrix_alloc(rows,cols); /* create matrix */    /* initialise matrix */   (i=0;i<rows;i++)     {       (j=0;j<cols;j++)     {       gsl_matrix_set(m,i,j,count);           count++;     }      }     printf("matrix m\n");   (i=0;i<rows;i++)     {       (j=0;j<cols;j++)     {       printf("%f ",gsl_matrix_get(m,i,j));     }       printf("\n");     }        red_data = pca(m,4);         r = red_data->size1;         c = red_data->size2;         printf("rows = %d, columns = %d \n",r,c);      (i = 0; i<r; i++)          {              for(k = 0; k < c; k++)               {                  printf("%lf  ",gsl_matrix_get(red_data,i,k));                }                  printf("\n");             }      return 0; }   gsl_matrix* pca(const gsl_matrix* data, unsigned int l) {     /*     @param data - matrix of data vectors, mxn matrix, each column data vector, m - dimension, n - data vector count     @param l - dimension reduction     */     assert(data != null);     assert(l > 0 && l < data->size2);     unsigned int i;     unsigned int rows = data->size1;     unsigned int cols = data->size2;     gsl_vector* mean = gsl_vector_alloc(rows);      for(i = 0; < rows; i++) {         gsl_vector_set(mean, i, gsl_stats_mean(data->data + * cols, 1, cols));     }      // mean-substracted data matrix mean_substracted_data.     gsl_matrix* mean_substracted_data = gsl_matrix_alloc(rows, cols);     gsl_matrix_memcpy(mean_substracted_data, data);     for(i = 0; < cols; i++) {         gsl_vector_view mean_substracted_point_view = gsl_matrix_column(mean_substracted_data, i);         gsl_vector_sub(&mean_substracted_point_view.vector, mean);     }     gsl_vector_free(mean);      // compute covariance matrix     gsl_matrix* covariance_matrix = gsl_matrix_alloc(rows, rows);     gsl_blas_dgemm(cblasnotrans, cblastrans, 1.0 / (double)(cols - 1), mean_substracted_data, mean_substracted_data, 0.0, covariance_matrix);     gsl_matrix_free(mean_substracted_data);      // eigenvectors, sort eigenvalue.     gsl_vector* eigenvalues = gsl_vector_alloc(rows);     gsl_matrix* eigenvectors = gsl_matrix_alloc(rows, rows);     gsl_eigen_symmv_workspace* workspace = gsl_eigen_symmv_alloc(rows);     gsl_eigen_symmv(covariance_matrix, eigenvalues, eigenvectors, workspace);     gsl_eigen_symmv_free(workspace);     gsl_matrix_free(covariance_matrix);      // sort eigenvectors     gsl_eigen_symmv_sort(eigenvalues, eigenvectors, gsl_eigen_sort_abs_desc);     gsl_vector_free(eigenvalues);      // project original dataset     gsl_matrix* result = gsl_matrix_alloc(l, cols);     gsl_matrix_view l_eigenvectors = gsl_matrix_submatrix(eigenvectors, 0, 0, rows, l);     gsl_blas_dgemm(cblastrans, cblasnotrans, 1.0, &l_eigenvectors.matrix, data, 0.0, result);     gsl_matrix_free(eigenvectors);      // result n lxn matrix, each column original data vector reduced dimension m l     return result; }   

i used below commands:

$ gcc pca.c -o pca -lgsl -lgslcblas -lm $ ./pca 

i have given 5 x 5 matrix , no:of principal components = 4. have compared results matlab ones. not matching. should comparable results ?. or there pca code implemented in c (need not gsl library) matching matlab results. thank ahead.


Comments

Popular posts from this blog

aws api gateway - SerializationException in posting new Records via Dynamodb Proxy Service in API -

asp.net - Problems sending emails from forum -