IFPACK: icrout_cholesky_mex.c Source File

00001 /*BHEADER**********************************************************************
00002  * (c) 2002   The Regents of the University of California
00003  *
00004  * See the file COPYRIGHT_and_DISCLAIMER for a complete copyright
00005  * notice, contact person, and disclaimer.
00006  *
00007  * $Revision: 1.3.14.1 $
00008 *********************************************************************EHEADER*/
00009 
00010 /*
00011  * Crout-form incomplete factorization
00012  *
00013  * To create a Matlab interface for icrout_cholesky_mex, define the symbol
00014  * MATLAB and compile this file as a Matlab mex program.
00015  * 
00016  * Contact Info for this code
00017  * --------------------------
00018  * Edmond Chow, Lawrence Livermore National Laboratory, echow@llnl.gov
00019  *
00020  * Revision History
00021  * ----------------
00022  * 06/04/02 Cleaned up for Boeing
00023  * 11/06/02 Added icrout_cholesky_mex
00024  */
00025 
00026 #undef IFPACK
00027 
00028 #define SYMSTR 1 /* structurally symmetric version */
00029 
00030 #ifdef MATLAB
00031 
00032 #include "matrix.h"
00033 #include "mex.h"
00034 #define malloc mxMalloc
00035 #define calloc mxCalloc
00036 #define realloc mxRealloc
00037 #define free mxFree
00038 #define printf mexPrintf
00039 
00040 #else
00041 
00042 #include <stdio.h>
00043 
00044 #endif
00045 
00046 #include <stdlib.h>
00047 #include <math.h>
00048 #include <assert.h>
00049 
00050 #define MIN(a,b) ((a)<=(b) ? (a) : (b))
00051 #define MAX(a,b) ((a)>=(b) ? (a) : (b))
00052 #define ABS(a) ((a)>=0 ? (a) : -(a))
00053 
00054 /*
00055  * Data structure for sparse matrices is CSR, 0-based indexing.
00056  */
00057 typedef struct {
00058     double *val;  /* also known as A  */
00059     int    *col;  /* also known as JA; first column is column 0 */
00060     int    *ptr;  /* also known as IA; with ptr[0] = 0 */
00061 } Matrix;
00062 
00063 void quicksort (int *const pbase, double *const daux, size_t total_elems);
00064 
00065 #define SHORTCUT(p, a, ja, ia) \
00066         (a)  = (p)->val; \
00067         (ja) = (p)->col; \
00068         (ia) = (p)->ptr;
00069 
00070 #define MATNULL(p) \
00071         (p).val = NULL; \
00072         (p).col = NULL; \
00073         (p).ptr = NULL;
00074 
00075 void Matrix_alloc(Matrix *a, int n, int nnz)
00076 {
00077     a->val = (double *) malloc(nnz * sizeof(double));
00078     a->col = (int *) malloc(nnz * sizeof(int));
00079     a->ptr = (int *) malloc((n+1) * sizeof(int));
00080 }
00081  
00082 void Matrix_dealloc(Matrix *a)
00083 {
00084     free(a->val);
00085     free(a->col);
00086     free(a->ptr);
00087     a->val = NULL;
00088     a->col = NULL;
00089     a->ptr = NULL;
00090 }
00091 
00092 static void qsplit(double *a, int *ind, int n, int ncut)
00093 {
00094     double tmp, abskey;
00095     int itmp, first, last, mid;
00096     int j;
00097  
00098     ncut--;
00099     first = 0;
00100     last = n-1;
00101     if (ncut < first || ncut > last) 
00102         return;
00103  
00104     /* outer loop while mid != ncut */
00105     while (1)
00106     {
00107         mid = first;
00108         abskey = ABS(a[mid]);
00109         for (j=first+1; j<=last; j++)
00110         {
00111             if (ABS(a[j]) > abskey)
00112             {
00113                 mid = mid+1;
00114                 /* interchange */
00115                 tmp = a[mid];
00116                 itmp = ind[mid];
00117                 a[mid] = a[j];
00118                 ind[mid] = ind[j];
00119                 a[j]  = tmp;
00120                 ind[j] = itmp;
00121             }
00122         }
00123        
00124         /* interchange */
00125         tmp = a[mid];
00126         a[mid] = a[first];
00127         a[first]  = tmp;
00128         itmp = ind[mid];
00129         ind[mid] = ind[first];
00130         ind[first] = itmp;
00131        
00132         /* test for while loop */
00133         if (mid == ncut) 
00134             return;
00135 
00136         if (mid > ncut)
00137             last = mid-1;
00138         else
00139             first = mid+1;
00140     }
00141 }
00142 
00143 /* update column k using previous columns */
00144 /* assumes that column of A resides in the work vector */
00145 /* this function can also be used to update rows */
00146  
00147 static void update_column(
00148     int k,
00149     const int *ia, const int *ja, const double *a,
00150     const int *ifirst,
00151     const int *ifirst2,
00152     const int *list2,
00153     const double *multipliers, /* the val array of the other factor */
00154     const double *d, /* diagonal of factorization */
00155     int *marker,
00156     double *ta,
00157     int *itcol,
00158     int *ptalen)
00159 {
00160     int j, i, isj, iej, row;
00161     int talen, pos;
00162     double mult;
00163  
00164     talen = *ptalen;
00165 
00166     j = list2[k];
00167     while (j != -1)
00168     {
00169         /* update column k using column j */
00170  
00171         isj = ifirst[j];
00172 
00173         /* skip first nonzero in column, since it does not contribute */
00174         /* if symmetric structure */
00175         /* isj++; */
00176  
00177         /* now do the actual update */
00178        if (isj != -1)
00179        {
00180         /* multiplier */
00181         mult = multipliers[ifirst2[j]] * d[j];
00182 
00183         /* section of column used for update */
00184         iej = ia[j+1]-1;
00185 
00186         for (i=isj; i<=iej; i++)
00187         {
00188             row = ja[i];
00189 
00190             /* if nonsymmetric structure */
00191             if (row <= k)
00192                 continue;
00193  
00194             if ((pos = marker[row]) != -1)
00195             {
00196                 /* already in pattern of working row */
00197                 ta[pos] -= mult*a[i];
00198             }
00199             else
00200             {
00201                 /* not yet in pattern of working row */
00202                 itcol[talen] = row;
00203                 ta[talen] = -mult*a[i];
00204                 marker[row] = talen++;
00205             }
00206         }
00207        }
00208  
00209         j = list2[j];
00210     }
00211 
00212     *ptalen = talen;
00213 }
00214 
00215 /* update ifirst and list */
00216  
00217 static void update_lists(
00218     int k,
00219     const int *ia,
00220     const int *ja,
00221     int *ifirst,
00222     int *list)
00223 {
00224     int j, isj, iej, iptr;
00225 
00226     j = list[k];
00227     while (j != -1)
00228     {
00229         isj = ifirst[j];
00230         iej = ia[j+1]-1;
00231  
00232         isj++;
00233  
00234         if (isj != 0 && isj <= iej) /* nonsymmetric structure */
00235         {
00236             /* increment ifirst for column j */
00237             ifirst[j] = isj;
00238 
00239             /* add j to head of list for list[ja[isj]] */
00240             iptr = j;
00241             j = list[j];
00242             list[iptr] = list[ja[isj]];
00243             list[ja[isj]] = iptr;
00244         }
00245         else
00246         {
00247             j = list[j];
00248         }
00249     }
00250 }
00251 
00252 static void update_lists_newcol(
00253     int k,
00254     int isk,
00255     int iptr,
00256     int *ifirst,
00257     int *list)
00258 {
00259     ifirst[k] = isk;
00260     list[k] = list[iptr];
00261     list[iptr] = k;
00262 }
00263 
00264 /*
00265  * crout_ict - Crout version of ICT - Incomplete Cholesky by Threshold
00266  *
00267  * The incomplete factorization L D L^T is computed,
00268  * where L is unit triangular, and D is diagonal
00269  *
00270  * INPUTS
00271  * n = number of rows or columns of the matrices
00272  * AL = unit lower triangular part of A stored by columns
00273  *            the unit diagonal is implied (not stored)
00274  * Adiag = diagonal part of A
00275  * droptol = drop tolerance
00276  * lfil  = max nonzeros per col in L factor or per row in U factor
00277  *
00278  * OUTPUTS
00279  * L     = lower triangular factor stored by columns
00280  * pdiag = diagonal factor stored in an array
00281  *
00282  * NOTE: calling function must free the memory allocated by this
00283  * function, i.e., L, pdiag.
00284  */
00285 
00286 void crout_ict(
00287     int n,
00288 #ifdef IFPACK
00289     void * A,
00290     int maxentries;
00291     int (*getcol)( void * A, int col, int ** nentries, double * val, int * ind),
00292     int (*getdiag)( void *A, double * diag),
00293 #else
00294     const Matrix *AL,
00295     const double *Adiag,
00296 #endif
00297     double droptol,
00298     int lfil,
00299     Matrix *L,
00300     double **pdiag)
00301 {
00302     int k, j, i, index;
00303     int count_l;
00304     double norm_l;
00305 
00306     /* work arrays; work_l is list of nonzero values */
00307     double *work_l = (double *) malloc(n * sizeof(double));
00308     int    *ind_l  = (int *)    malloc(n * sizeof(int));
00309     int     len_l;
00310 
00311     /* list and ifirst data structures */
00312     int *list_l    = (int *) malloc(n * sizeof(int));
00313     int *ifirst_l  = (int *) malloc(n * sizeof(int));
00314     
00315     /* aliases */
00316     int *marker_l  = ifirst_l;
00317 
00318     /* matrix arrays */
00319     double *al; int *jal, *ial;
00320     double *l;  int *jl,  *il;
00321 
00322     int kl = 0;
00323 
00324     double *diag = (double *) malloc(n * sizeof(double));
00325     *pdiag = diag;
00326 
00327     Matrix_alloc(L,  n, lfil*n);
00328 
00329     SHORTCUT(AL, al, jal, ial);
00330     SHORTCUT(L,  l,  jl,  il);
00331 
00332     /* initialize work arrays */
00333     for (i=0; i<n; i++)
00334     {
00335         list_l[i]    = -1;
00336         ifirst_l[i]  = -1;
00337         marker_l[i]  = -1;
00338     }
00339 
00340     /* copy the diagonal */
00341 #ifdef IFPACK
00342     getdiag(A, diag);
00343 #else
00344     for (i=0; i<n; i++)
00345         diag[i] = Adiag[i];
00346 #endif
00347 
00348     /* start off the matrices right */
00349     il[0]  = 0;
00350 
00351     /*
00352      * Main loop over columns and rows
00353      */
00354 
00355     for (k=0; k<n; k++)
00356     {
00357         /*
00358          * lower triangular factor update by columns
00359          * (need ifirst for L and lists for U)
00360          */
00361  
00362         /* copy column of A into work vector */
00363         norm_l = 0.;
00364 #ifdef IFPACK
00365       getcol(A, k, len_l, work_l, ind_l);
00366       for (j=0; j<len_l; j++)
00367     {
00368       norm_l += ABS(work_l[j]);
00369       marker_l[ind_l[j]] = j;
00370     }
00371 #else
00372         len_l = 0;
00373         for (j=ial[k]; j<ial[k+1]; j++)
00374         {
00375             index = jal[j];
00376             work_l[len_l] = al[j];
00377             norm_l += ABS(al[j]);
00378             ind_l[len_l] = index;
00379             marker_l[index] = len_l++; /* points into work array */
00380         }
00381 #endif
00382         norm_l = (len_l == 0) ? 0.0 : norm_l/((double) len_l);
00383  
00384         /* update and scale */
00385 
00386         update_column(k, il, jl, l, ifirst_l, ifirst_l, list_l, l,
00387             diag, marker_l, work_l, ind_l, &len_l);
00388 
00389         for (j=0; j<len_l; j++)
00390             work_l[j] /= diag[k];
00391 
00392     i = 0;
00393         for (j=0; j<len_l; j++)
00394     {
00395         if (ABS(work_l[j]) < droptol * norm_l)
00396         {
00397             /* zero out marker array for this */
00398         marker_l[ind_l[j]] = -1;
00399         }
00400         else
00401         {
00402             work_l[i] = work_l[j];
00403         ind_l[i]  = ind_l[j];
00404         i++;
00405         }
00406     }
00407     len_l = i;
00408 
00409     /*
00410      * dropping:  for each work vector, perform qsplit, and then
00411      * sort each part by increasing index; then copy each sorted
00412      * part into the factors
00413      */
00414 
00415     count_l = MIN(len_l, lfil);
00416     qsplit(work_l, ind_l, len_l, count_l);
00417     quicksort(ind_l, work_l, count_l);
00418 
00419     for (j=0; j<count_l; j++)
00420     {
00421         l[kl] = work_l[j];
00422         jl[kl++] = ind_l[j];
00423     }
00424     il[k+1] = kl;
00425 
00426     /*
00427      * update lists
00428      */
00429     
00430         update_lists(k, il, jl, ifirst_l, list_l);
00431 
00432     if (kl - il[k] > SYMSTR)
00433         update_lists_newcol(k, il[k], jl[il[k]], ifirst_l, list_l);
00434 
00435         /* zero out the marker arrays */
00436         for (j=0; j<len_l; j++)
00437             marker_l[ind_l[j]] = -1;
00438 
00439         /*
00440          * update the diagonal (after dropping)
00441          */
00442 
00443         for (j=0; j<count_l; j++)
00444         {
00445             index = ind_l[j];
00446             diag[index] -= work_l[j] * work_l[j] * diag[k];
00447         }
00448     }
00449 
00450     free(work_l);
00451     free(ind_l);
00452     free(list_l);
00453     free(ifirst_l);
00454 }
00455 
00456 #ifdef MATLAB
00457 /*
00458  * [l, d] = icrout_cholesky_mex(AL, Adiag, droptol, lfil)
00459  * 
00460  * where AL is a sparse matrix (strictly lower triangular part)
00461  * Adiag is a vector
00462  * droptol is a scalar real
00463  * lfil is a scalar integer
00464  *
00465  * d must be converted to a matrix (from a vector) by icrout_cholesky.m
00466  */
00467 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
00468 {
00469     int n;
00470     Matrix AL;
00471     double *Adiag;
00472     double droptol;
00473     int lfil;
00474     Matrix L;
00475     double *D;
00476 
00477     if (nrhs != 4)
00478         mexErrMsgTxt("mex function called with bad number of arguments");
00479 
00480     n = mxGetN(prhs[0]);
00481     AL.ptr = mxGetJc(prhs[0]);
00482     AL.col = mxGetIr(prhs[0]);
00483     AL.val = mxGetPr(prhs[0]);
00484     Adiag  = mxGetPr(prhs[1]);
00485 
00486     droptol = (double) *mxGetPr(prhs[2]);
00487     lfil   = (int) *mxGetPr(prhs[3]);
00488 
00489     crout_ict(n, &AL, Adiag, droptol, lfil, &L, &D);
00490 
00491     /* create output matrices */
00492 
00493     /* L */
00494     plhs[0] = mxCreateSparse(n, n, 1, mxREAL);
00495     mxFree(mxGetJc(plhs[0]));
00496     mxFree(mxGetIr(plhs[0]));
00497     mxFree(mxGetPr(plhs[0]));
00498     mxSetJc(plhs[0], L.ptr);
00499     mxSetIr(plhs[0], L.col);
00500     mxSetPr(plhs[0], L.val);
00501     mxSetNzmax(plhs[0], n*lfil); /* must agree */
00502 
00503     /* D */
00504     plhs[1] = mxCreateDoubleMatrix(n, 1, mxREAL);
00505     mxFree(mxGetPr(plhs[1]));
00506     mxSetPr(plhs[1], D);
00507 }
00508 #endif