xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cb8a5a2e4a0f6e92c3fb2e84e505eeca89835e11)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.10 1996/03/08 05:47:43 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sysio.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerFileGetFormat_Private(viewer,&format);
165   if (format == FILE_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == FILE_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 
225 static int MatZeroEntries_SeqBAIJ(Mat A)
226 {
227   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
228   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
229   return 0;
230 }
231 
232 int MatDestroy_SeqBAIJ(PetscObject obj)
233 {
234   Mat        A  = (Mat) obj;
235   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
236 
237 #if defined(PETSC_LOG)
238   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
239 #endif
240   PetscFree(a->a);
241   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
242   if (a->diag) PetscFree(a->diag);
243   if (a->ilen) PetscFree(a->ilen);
244   if (a->imax) PetscFree(a->imax);
245   if (a->solve_work) PetscFree(a->solve_work);
246   if (a->mult_work) PetscFree(a->mult_work);
247   PetscFree(a);
248   PLogObjectDestroy(A);
249   PetscHeaderDestroy(A);
250   return 0;
251 }
252 
253 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
254 {
255   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
256   if      (op == ROW_ORIENTED)              a->roworiented = 1;
257   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
258   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
259   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
260   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
261   else if (op == ROWS_SORTED ||
262            op == SYMMETRIC_MATRIX ||
263            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
264            op == YES_NEW_DIAGONALS)
265     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
266   else if (op == NO_NEW_DIAGONALS)
267     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
268   else
269     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
270   return 0;
271 }
272 
273 
274 /* -------------------------------------------------------*/
275 /* Should check that shapes of vectors and matrices match */
276 /* -------------------------------------------------------*/
277 #include "pinclude/plapack.h"
278 
279 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
280 {
281   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
282   Scalar          *xg,*yg;
283   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
284   register Scalar x1,x2,x3,x4,x5;
285   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
286   int             bs = a->bs,j,n,bs2 = bs*bs;
287 
288   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
289   PetscMemzero(y,m*sizeof(Scalar));
290   x     = x;
291   idx   = a->j;
292   v     = a->a;
293   ii    = a->i;
294 
295   switch (bs) {
296     case 1:
297      for ( i=0; i<m; i++ ) {
298        n    = ii[1] - ii[0]; ii++;
299        sum  = 0.0;
300        while (n--) sum += *v++ * x[*idx++];
301        y[i] = sum;
302       }
303       break;
304     case 2:
305       for ( i=0; i<mbs; i++ ) {
306         n  = ii[1] - ii[0]; ii++;
307         sum1 = 0.0; sum2 = 0.0;
308         for ( j=0; j<n; j++ ) {
309           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
310           sum1 += v[0]*x1 + v[2]*x2;
311           sum2 += v[1]*x1 + v[3]*x2;
312           v += 4;
313         }
314         y[0] += sum1; y[1] += sum2;
315         y += 2;
316       }
317       break;
318     case 3:
319       for ( i=0; i<mbs; i++ ) {
320         n  = ii[1] - ii[0]; ii++;
321         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
322         for ( j=0; j<n; j++ ) {
323           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
324           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
325           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
326           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
327           v += 9;
328         }
329         y[0] += sum1; y[1] += sum2; y[2] += sum3;
330         y += 3;
331       }
332       break;
333     case 4:
334       for ( i=0; i<mbs; i++ ) {
335         n  = ii[1] - ii[0]; ii++;
336         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
337         for ( j=0; j<n; j++ ) {
338           xb = x + 4*(*idx++);
339           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
340           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
341           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
342           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
343           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
344           v += 16;
345         }
346         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
347         y += 4;
348       }
349       break;
350     case 5:
351       for ( i=0; i<mbs; i++ ) {
352         n  = ii[1] - ii[0]; ii++;
353         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
354         for ( j=0; j<n; j++ ) {
355           xb = x + 5*(*idx++);
356           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
357           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
358           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
359           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
360           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
361           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
362           v += 25;
363         }
364         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
365         y += 5;
366       }
367       break;
368       /* block sizes larger then 5 by 5 are handled by BLAS */
369     default: {
370       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
371       if (!a->mult_work) {
372         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
373         CHKPTRQ(a->mult_work);
374       }
375       work = a->mult_work;
376       for ( i=0; i<mbs; i++ ) {
377         n     = ii[1] - ii[0]; ii++;
378         ncols = n*bs;
379         workt = work;
380         for ( j=0; j<n; j++ ) {
381           xb = x + bs*(*idx++);
382           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
383           workt += bs;
384         }
385         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
386         v += n*bs2;
387         y += bs;
388       }
389     }
390   }
391   PLogFlops(2*bs2*a->nz - m);
392   return 0;
393 }
394 
395 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   if (nz)  *nz  = a->bs*a->bs*a->nz;
399   if (nza) *nza = a->maxnz;
400   if (mem) *mem = (int)A->mem;
401   return 0;
402 }
403 
404 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
405 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
406 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
407 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
408 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
409 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
410 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
411 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
412 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
413 
414 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
415 {
416   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
417   *m = a->m; *n = a->n;
418   return 0;
419 }
420 
421 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
422 {
423   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
424   *m = 0; *n = a->m;
425   return 0;
426 }
427 
428 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
429 {
430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
431   Scalar      *v = a->a;
432   double      sum = 0.0;
433   int         i;
434 
435   if (type == NORM_FROBENIUS) {
436     for (i=0; i<a->nz; i++ ) {
437 #if defined(PETSC_COMPLEX)
438       sum += real(conj(*v)*(*v)); v++;
439 #else
440       sum += (*v)*(*v); v++;
441 #endif
442     }
443     *norm = sqrt(sum);
444   }
445   else {
446     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
447   }
448   return 0;
449 }
450 
451 /*
452      note: This can only work for identity for row and col. It would
453    be good to check this and otherwise generate an error.
454 */
455 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
456 {
457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
458   Mat         outA;
459   int         ierr;
460 
461   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
462 
463   outA          = inA;
464   inA->factor   = FACTOR_LU;
465   a->row        = row;
466   a->col        = col;
467 
468   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
469 
470   if (!a->diag) {
471     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
472   }
473   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
474   return 0;
475 }
476 
477 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
478 {
479   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
480   int         one = 1, totalnz = a->bs*a->bs*a->nz;
481   BLscal_( &totalnz, alpha, a->a, &one );
482   PLogFlops(totalnz);
483   return 0;
484 }
485 
486 int MatPrintHelp_SeqBAIJ(Mat A)
487 {
488   static int called = 0;
489 
490   if (called) return 0; else called = 1;
491   return 0;
492 }
493 /* -------------------------------------------------------------------*/
494 static struct _MatOps MatOps = {0,
495        0,0,
496        MatMult_SeqBAIJ,0,
497        0,0,
498        MatSolve_SeqBAIJ,0,
499        0,0,
500        MatLUFactor_SeqBAIJ,0,
501        0,
502        0,
503        MatGetInfo_SeqBAIJ,0,
504        0,0,MatNorm_SeqBAIJ,
505        0,0,
506        0,
507        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
508        MatGetReordering_SeqBAIJ,
509        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
510        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
511        MatILUFactorSymbolic_SeqBAIJ,0,
512        0,0,/*MatConvert_SeqBAIJ*/ 0,
513        0,0,
514        MatConvertSameType_SeqBAIJ,0,0,
515        MatILUFactor_SeqBAIJ,0,0,
516        0,0,
517        0,0,
518        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
519        0};
520 
521 /*@C
522    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
523    (the default parallel PETSc format).  For good matrix assembly performance
524    the user should preallocate the matrix storage by setting the parameter nz
525    (or nzz).  By setting these parameters accurately, performance can be
526    increased by more than a factor of 50.
527 
528    Input Parameters:
529 .  comm - MPI communicator, set to MPI_COMM_SELF
530 .  bs - size of block
531 .  m - number of rows
532 .  n - number of columns
533 .  nz - number of block nonzeros per block row (same for all rows)
534 .  nzz - number of block nonzeros per block row or PETSC_NULL
535          (possibly different for each row)
536 
537    Output Parameter:
538 .  A - the matrix
539 
540    Notes:
541    The BAIJ format, is fully compatible with standard Fortran 77
542    storage.  That is, the stored row and column indices can begin at
543    either one (as in Fortran) or zero.  See the users manual for details.
544 
545    Specify the preallocated storage with either nz or nnz (not both).
546    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
547    allocation.  For additional details, see the users manual chapter on
548    matrices and the file $(PETSC_DIR)/Performance.
549 
550 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
551 @*/
552 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
553 {
554   Mat         B;
555   Mat_SeqBAIJ *b;
556   int         i,len,ierr, flg,mbs = m/bs;
557 
558   if (mbs*bs != m)
559     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
560 
561   *A      = 0;
562   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
563   PLogObjectCreate(B);
564   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
565   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
566   switch (bs) {
567     case 1:
568       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
569       break;
570     case 2:
571       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
572       break;
573     case 3:
574       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
575       break;
576     case 4:
577       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
578       break;
579     case 5:
580       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
581       break;
582   }
583   B->destroy          = MatDestroy_SeqBAIJ;
584   B->view             = MatView_SeqBAIJ;
585   B->factor           = 0;
586   B->lupivotthreshold = 1.0;
587   b->row              = 0;
588   b->col              = 0;
589   b->reallocs         = 0;
590 
591   b->m       = m;
592   b->mbs     = mbs;
593   b->n       = n;
594   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
595   if (nnz == PETSC_NULL) {
596     if (nz == PETSC_DEFAULT) nz = 5;
597     else if (nz <= 0)        nz = 1;
598     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
599     nz = nz*mbs;
600   }
601   else {
602     nz = 0;
603     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
604   }
605 
606   /* allocate the matrix space */
607   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
608   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
609   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
610   b->j  = (int *) (b->a + nz*bs*bs);
611   PetscMemzero(b->j,nz*sizeof(int));
612   b->i  = b->j + nz;
613   b->singlemalloc = 1;
614 
615   b->i[0] = 0;
616   for (i=1; i<mbs+1; i++) {
617     b->i[i] = b->i[i-1] + b->imax[i-1];
618   }
619 
620   /* b->ilen will count nonzeros in each block row so far. */
621   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
622   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
623   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
624 
625   b->bs               = bs;
626   b->mbs              = mbs;
627   b->nz               = 0;
628   b->maxnz            = nz;
629   b->sorted           = 0;
630   b->roworiented      = 1;
631   b->nonew            = 0;
632   b->diag             = 0;
633   b->solve_work       = 0;
634   b->mult_work        = 0;
635   b->spptr            = 0;
636 
637   *A = B;
638   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
639   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
640   return 0;
641 }
642 
643 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
644 {
645   Mat         C;
646   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
647   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
648 
649   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
650 
651   *B = 0;
652   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
653   PLogObjectCreate(C);
654   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
655   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
656   C->destroy    = MatDestroy_SeqBAIJ;
657   C->view       = MatView_SeqBAIJ;
658   C->factor     = A->factor;
659   c->row        = 0;
660   c->col        = 0;
661   C->assembled  = PETSC_TRUE;
662 
663   c->m          = a->m;
664   c->n          = a->n;
665   c->bs         = a->bs;
666   c->mbs        = a->mbs;
667   c->nbs        = a->nbs;
668 
669   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
670   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
671   for ( i=0; i<mbs; i++ ) {
672     c->imax[i] = a->imax[i];
673     c->ilen[i] = a->ilen[i];
674   }
675 
676   /* allocate the matrix space */
677   c->singlemalloc = 1;
678   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
679   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
680   c->j  = (int *) (c->a + nz*bs*bs);
681   c->i  = c->j + nz;
682   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
683   if (mbs > 0) {
684     PetscMemcpy(c->j,a->j,nz*sizeof(int));
685     if (cpvalues == COPY_VALUES) {
686       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
687     }
688   }
689 
690   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
691   c->sorted      = a->sorted;
692   c->roworiented = a->roworiented;
693   c->nonew       = a->nonew;
694 
695   if (a->diag) {
696     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
697     PLogObjectMemory(C,(mbs+1)*sizeof(int));
698     for ( i=0; i<mbs; i++ ) {
699       c->diag[i] = a->diag[i];
700     }
701   }
702   else c->diag          = 0;
703   c->nz                 = a->nz;
704   c->maxnz              = a->maxnz;
705   c->solve_work         = 0;
706   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
707   c->mult_work          = 0;
708   *B = C;
709   return 0;
710 }
711 
712 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
713 {
714   Mat_SeqBAIJ  *a;
715   Mat          B;
716   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
717   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
718   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
719   int          *masked, nmask,tmp,ishift,bs2;
720   Scalar       *aa;
721   MPI_Comm     comm = ((PetscObject) viewer)->comm;
722 
723   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
724   bs2  = bs*bs;
725 
726   MPI_Comm_size(comm,&size);
727   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
728   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
729   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
730   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
731   M = header[1]; N = header[2]; nz = header[3];
732 
733   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
734 
735   /*
736      This code adds extra rows to make sure the number of rows is
737     divisible by the blocksize
738   */
739   mbs        = M/bs;
740   extra_rows = bs - M + bs*(mbs);
741   if (extra_rows == bs) extra_rows = 0;
742   else                  mbs++;
743   if (extra_rows) {
744     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
745   }
746 
747   /* read in row lengths */
748   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
749   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
750   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
751 
752   /* read in column indices */
753   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
754   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
755   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
756 
757   /* loop over row lengths determining block row lengths */
758   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
759   PetscMemzero(browlengths,mbs*sizeof(int));
760   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
761   PetscMemzero(mask,mbs*sizeof(int));
762   masked = mask + mbs;
763   rowcount = 0; nzcount = 0;
764   for ( i=0; i<mbs; i++ ) {
765     nmask = 0;
766     for ( j=0; j<bs; j++ ) {
767       kmax = rowlengths[rowcount];
768       for ( k=0; k<kmax; k++ ) {
769         tmp = jj[nzcount++]/bs;
770         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
771       }
772       rowcount++;
773     }
774     browlengths[i] += nmask;
775     /* zero out the mask elements we set */
776     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
777   }
778 
779   /* create our matrix */
780   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
781          CHKERRQ(ierr);
782   B = *A;
783   a = (Mat_SeqBAIJ *) B->data;
784 
785   /* set matrix "i" values */
786   a->i[0] = 0;
787   for ( i=1; i<= mbs; i++ ) {
788     a->i[i]      = a->i[i-1] + browlengths[i-1];
789     a->ilen[i-1] = browlengths[i-1];
790   }
791   a->nz = 0;
792   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
793 
794   /* read in nonzero values */
795   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
796   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
797   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
798 
799   /* set "a" and "j" values into matrix */
800   nzcount = 0; jcount = 0;
801   for ( i=0; i<mbs; i++ ) {
802     nzcountb = nzcount;
803     nmask    = 0;
804     for ( j=0; j<bs; j++ ) {
805       kmax = rowlengths[i*bs+j];
806       for ( k=0; k<kmax; k++ ) {
807         tmp = jj[nzcount++]/bs;
808 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
809       }
810       rowcount++;
811     }
812     /* sort the masked values */
813     SYIsort(nmask,masked);
814 
815     /* set "j" values into matrix */
816     maskcount = 1;
817     for ( j=0; j<nmask; j++ ) {
818       a->j[jcount++]  = masked[j];
819       mask[masked[j]] = maskcount++;
820     }
821     /* set "a" values into matrix */
822     ishift = bs2*a->i[i];
823     for ( j=0; j<bs; j++ ) {
824       kmax = rowlengths[i*bs+j];
825       for ( k=0; k<kmax; k++ ) {
826         tmp    = jj[nzcountb]/bs ;
827         block  = mask[tmp] - 1;
828         point  = jj[nzcountb] - bs*tmp;
829         idx    = ishift + bs2*block + j + bs*point;
830         a->a[idx] = aa[nzcountb++];
831       }
832     }
833     /* zero out the mask elements we set */
834     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
835   }
836   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
837 
838   PetscFree(rowlengths);
839   PetscFree(browlengths);
840   PetscFree(aa);
841   PetscFree(jj);
842   PetscFree(mask);
843 
844   B->assembled = PETSC_TRUE;
845 
846   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
847   if (flg) {
848     Viewer tviewer;
849     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
850     ierr = ViewerFileSetFormat(tviewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
851     ierr = MatView(B,tviewer); CHKERRQ(ierr);
852     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
853   }
854   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
855   if (flg) {
856     Viewer tviewer;
857     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
858     ierr = ViewerFileSetFormat(tviewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
859     ierr = MatView(B,tviewer); CHKERRQ(ierr);
860     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
861   }
862   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
863   if (flg) {
864     Viewer tviewer;
865     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
866     ierr = MatView(B,tviewer); CHKERRQ(ierr);
867     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
868   }
869   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
870   if (flg) {
871     Viewer tviewer;
872     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
873     ierr = ViewerFileSetFormat(tviewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
874     ierr = MatView(B,tviewer); CHKERRQ(ierr);
875     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
876   }
877   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
878   if (flg) {
879     Viewer tviewer;
880     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
881     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
882     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
883     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
884   }
885   return 0;
886 }
887 
888 
889 
890