xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f63b844a08011cbceb00a6ecbb68fd67c9b34e11)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.16 1996/03/23 20:43:02 bsmith Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   ViewerType  vtype;
202   int         ierr;
203 
204   if (!viewer) {
205     viewer = STDOUT_VIEWER_SELF;
206   }
207 
208   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
209   if (vtype == MATLAB_VIEWER) {
210     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
211   }
212   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213     return MatView_SeqBAIJ_ASCII(A,viewer);
214   }
215   else if (vtype == BINARY_FILE_VIEWER) {
216     return MatView_SeqBAIJ_Binary(A,viewer);
217   }
218   else if (vtype == DRAW_VIEWER) {
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 
225 static int MatZeroEntries_SeqBAIJ(Mat A)
226 {
227   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
228   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
229   return 0;
230 }
231 
232 int MatDestroy_SeqBAIJ(PetscObject obj)
233 {
234   Mat        A  = (Mat) obj;
235   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
236 
237 #if defined(PETSC_LOG)
238   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
239 #endif
240   PetscFree(a->a);
241   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
242   if (a->diag) PetscFree(a->diag);
243   if (a->ilen) PetscFree(a->ilen);
244   if (a->imax) PetscFree(a->imax);
245   if (a->solve_work) PetscFree(a->solve_work);
246   if (a->mult_work) PetscFree(a->mult_work);
247   PetscFree(a);
248   PLogObjectDestroy(A);
249   PetscHeaderDestroy(A);
250   return 0;
251 }
252 
253 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
254 {
255   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
256   if      (op == ROW_ORIENTED)              a->roworiented = 1;
257   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
258   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
259   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
260   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
261   else if (op == ROWS_SORTED ||
262            op == SYMMETRIC_MATRIX ||
263            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
264            op == YES_NEW_DIAGONALS)
265     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
266   else if (op == NO_NEW_DIAGONALS)
267     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
268   else
269     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
270   return 0;
271 }
272 
273 
274 /* -------------------------------------------------------*/
275 /* Should check that shapes of vectors and matrices match */
276 /* -------------------------------------------------------*/
277 #include "pinclude/plapack.h"
278 
279 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
280 {
281   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
282   Scalar          *xg,*yg;
283   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
284   register Scalar x1,x2,x3,x4,x5;
285   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
286   int             bs = a->bs,j,n,bs2 = bs*bs;
287 
288   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
289   PetscMemzero(y,m*sizeof(Scalar));
290   x     = x;
291   idx   = a->j;
292   v     = a->a;
293   ii    = a->i;
294 
295   switch (bs) {
296     case 1:
297      for ( i=0; i<m; i++ ) {
298        n    = ii[1] - ii[0]; ii++;
299        sum  = 0.0;
300        while (n--) sum += *v++ * x[*idx++];
301        y[i] = sum;
302       }
303       break;
304     case 2:
305       for ( i=0; i<mbs; i++ ) {
306         n  = ii[1] - ii[0]; ii++;
307         sum1 = 0.0; sum2 = 0.0;
308         for ( j=0; j<n; j++ ) {
309           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
310           sum1 += v[0]*x1 + v[2]*x2;
311           sum2 += v[1]*x1 + v[3]*x2;
312           v += 4;
313         }
314         y[0] += sum1; y[1] += sum2;
315         y += 2;
316       }
317       break;
318     case 3:
319       for ( i=0; i<mbs; i++ ) {
320         n  = ii[1] - ii[0]; ii++;
321         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
322         for ( j=0; j<n; j++ ) {
323           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
324           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
325           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
326           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
327           v += 9;
328         }
329         y[0] += sum1; y[1] += sum2; y[2] += sum3;
330         y += 3;
331       }
332       break;
333     case 4:
334       for ( i=0; i<mbs; i++ ) {
335         n  = ii[1] - ii[0]; ii++;
336         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
337         for ( j=0; j<n; j++ ) {
338           xb = x + 4*(*idx++);
339           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
340           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
341           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
342           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
343           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
344           v += 16;
345         }
346         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
347         y += 4;
348       }
349       break;
350     case 5:
351       for ( i=0; i<mbs; i++ ) {
352         n  = ii[1] - ii[0]; ii++;
353         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
354         for ( j=0; j<n; j++ ) {
355           xb = x + 5*(*idx++);
356           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
357           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
358           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
359           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
360           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
361           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
362           v += 25;
363         }
364         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
365         y += 5;
366       }
367       break;
368       /* block sizes larger then 5 by 5 are handled by BLAS */
369     default: {
370       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
371       if (!a->mult_work) {
372         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
373         CHKPTRQ(a->mult_work);
374       }
375       work = a->mult_work;
376       for ( i=0; i<mbs; i++ ) {
377         n     = ii[1] - ii[0]; ii++;
378         ncols = n*bs;
379         workt = work;
380         for ( j=0; j<n; j++ ) {
381           xb = x + bs*(*idx++);
382           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
383           workt += bs;
384         }
385         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
386         v += n*bs2;
387         y += bs;
388       }
389     }
390   }
391   PLogFlops(2*bs2*a->nz - m);
392   return 0;
393 }
394 
395 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   if (nz)  *nz  = a->bs*a->bs*a->nz;
399   if (nza) *nza = a->maxnz;
400   if (mem) *mem = (int)A->mem;
401   return 0;
402 }
403 
404 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
405 {
406   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
407 
408   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
409 
410   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
411   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
412       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
413     *flg = PETSC_FALSE; return 0;
414   }
415 
416   /* if the a->i are the same */
417   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
418     *flg = PETSC_FALSE; return 0;
419   }
420 
421   /* if a->j are the same */
422   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
423     *flg = PETSC_FALSE; return 0;
424   }
425 
426   /* if a->a are the same */
427   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
428     *flg = PETSC_FALSE; return 0;
429   }
430   *flg = PETSC_TRUE;
431   return 0;
432 
433 }
434 
435 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
436 {
437   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
438   int        i,j, n,shift = a->indexshift;
439   Scalar     *x, zero = 0.0;
440 
441   VecSet(&zero,v);
442   VecGetArray(v,&x); VecGetLocalSize(v,&n);
443   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqAIJ:Nonconforming matrix and vector");
444   for ( i=0; i<a->mbs; i++ ) {
445     for ( j=a->i[i]+shift; j<a->i[i+1]+shift; j++ ) {
446       if (a->j[j]+shift == i) {
447         x[i] = a->a[j];
448         break;
449       }
450     }
451   }
452   return 0;
453 }
454 
455 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
456 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
457 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
458 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
459 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
460 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
461 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
462 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
463 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
464 
465 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
466 {
467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
468   *m = a->m; *n = a->n;
469   return 0;
470 }
471 
472 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
473 {
474   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
475   *m = 0; *n = a->m;
476   return 0;
477 }
478 
479 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
480 {
481   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
482   Scalar      *v = a->a;
483   double      sum = 0.0;
484   int         i;
485 
486   if (type == NORM_FROBENIUS) {
487     for (i=0; i<a->nz; i++ ) {
488 #if defined(PETSC_COMPLEX)
489       sum += real(conj(*v)*(*v)); v++;
490 #else
491       sum += (*v)*(*v); v++;
492 #endif
493     }
494     *norm = sqrt(sum);
495   }
496   else {
497     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
498   }
499   return 0;
500 }
501 
502 /*
503      note: This can only work for identity for row and col. It would
504    be good to check this and otherwise generate an error.
505 */
506 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
509   Mat         outA;
510   int         ierr;
511 
512   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
513 
514   outA          = inA;
515   inA->factor   = FACTOR_LU;
516   a->row        = row;
517   a->col        = col;
518 
519   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
520 
521   if (!a->diag) {
522     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
523   }
524   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
525   return 0;
526 }
527 
528 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
529 {
530   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
531   int         one = 1, totalnz = a->bs*a->bs*a->nz;
532   BLscal_( &totalnz, alpha, a->a, &one );
533   PLogFlops(totalnz);
534   return 0;
535 }
536 
537 int MatPrintHelp_SeqBAIJ(Mat A)
538 {
539   static int called = 0;
540 
541   if (called) return 0; else called = 1;
542   return 0;
543 }
544 /* -------------------------------------------------------------------*/
545 static struct _MatOps MatOps = {0,
546        0,0,
547        MatMult_SeqBAIJ,0,
548        0,0,
549        MatSolve_SeqBAIJ,0,
550        0,0,
551        MatLUFactor_SeqBAIJ,0,
552        0,
553        0,
554        MatGetInfo_SeqBAIJ,0,
555        0,0,MatNorm_SeqBAIJ,
556        0,0,
557        0,
558        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
559        MatGetReordering_SeqBAIJ,
560        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
561        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
562        MatILUFactorSymbolic_SeqBAIJ,0,
563        0,0,/*  MatConvert_SeqBAIJ  */ 0,
564        0,0,
565        MatConvertSameType_SeqBAIJ,0,0,
566        MatILUFactor_SeqBAIJ,0,0,
567        0,0,
568        0,0,
569        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
570        0};
571 
572 /*@C
573    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
574    (the default parallel PETSc format).  For good matrix assembly performance
575    the user should preallocate the matrix storage by setting the parameter nz
576    (or nzz).  By setting these parameters accurately, performance can be
577    increased by more than a factor of 50.
578 
579    Input Parameters:
580 .  comm - MPI communicator, set to MPI_COMM_SELF
581 .  bs - size of block
582 .  m - number of rows
583 .  n - number of columns
584 .  nz - number of block nonzeros per block row (same for all rows)
585 .  nzz - number of block nonzeros per block row or PETSC_NULL
586          (possibly different for each row)
587 
588    Output Parameter:
589 .  A - the matrix
590 
591    Notes:
592    The BAIJ format, is fully compatible with standard Fortran 77
593    storage.  That is, the stored row and column indices can begin at
594    either one (as in Fortran) or zero.  See the users manual for details.
595 
596    Specify the preallocated storage with either nz or nnz (not both).
597    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
598    allocation.  For additional details, see the users manual chapter on
599    matrices and the file $(PETSC_DIR)/Performance.
600 
601 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
602 @*/
603 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
604 {
605   Mat         B;
606   Mat_SeqBAIJ *b;
607   int         i,len,ierr, flg,mbs = m/bs;
608 
609   if (mbs*bs != m)
610     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
611 
612   *A      = 0;
613   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
614   PLogObjectCreate(B);
615   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
616   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
617   switch (bs) {
618     case 1:
619       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
620       break;
621     case 2:
622       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
623       break;
624     case 3:
625       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
626       break;
627     case 4:
628       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
629       break;
630     case 5:
631       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
632       break;
633   }
634   B->destroy          = MatDestroy_SeqBAIJ;
635   B->view             = MatView_SeqBAIJ;
636   B->factor           = 0;
637   B->lupivotthreshold = 1.0;
638   b->row              = 0;
639   b->col              = 0;
640   b->reallocs         = 0;
641 
642   b->m       = m;
643   b->mbs     = mbs;
644   b->n       = n;
645   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
646   if (nnz == PETSC_NULL) {
647     if (nz == PETSC_DEFAULT) nz = 5;
648     else if (nz <= 0)        nz = 1;
649     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
650     nz = nz*mbs;
651   }
652   else {
653     nz = 0;
654     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
655   }
656 
657   /* allocate the matrix space */
658   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
659   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
660   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
661   b->j  = (int *) (b->a + nz*bs*bs);
662   PetscMemzero(b->j,nz*sizeof(int));
663   b->i  = b->j + nz;
664   b->singlemalloc = 1;
665 
666   b->i[0] = 0;
667   for (i=1; i<mbs+1; i++) {
668     b->i[i] = b->i[i-1] + b->imax[i-1];
669   }
670 
671   /* b->ilen will count nonzeros in each block row so far. */
672   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
673   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
674   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
675 
676   b->bs               = bs;
677   b->mbs              = mbs;
678   b->nz               = 0;
679   b->maxnz            = nz;
680   b->sorted           = 0;
681   b->roworiented      = 1;
682   b->nonew            = 0;
683   b->diag             = 0;
684   b->solve_work       = 0;
685   b->mult_work        = 0;
686   b->spptr            = 0;
687 
688   *A = B;
689   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
690   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
691   return 0;
692 }
693 
694 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
695 {
696   Mat         C;
697   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
698   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
699 
700   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
701 
702   *B = 0;
703   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
704   PLogObjectCreate(C);
705   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
706   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
707   C->destroy    = MatDestroy_SeqBAIJ;
708   C->view       = MatView_SeqBAIJ;
709   C->factor     = A->factor;
710   c->row        = 0;
711   c->col        = 0;
712   C->assembled  = PETSC_TRUE;
713 
714   c->m          = a->m;
715   c->n          = a->n;
716   c->bs         = a->bs;
717   c->mbs        = a->mbs;
718   c->nbs        = a->nbs;
719 
720   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
721   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
722   for ( i=0; i<mbs; i++ ) {
723     c->imax[i] = a->imax[i];
724     c->ilen[i] = a->ilen[i];
725   }
726 
727   /* allocate the matrix space */
728   c->singlemalloc = 1;
729   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
730   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
731   c->j  = (int *) (c->a + nz*bs*bs);
732   c->i  = c->j + nz;
733   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
734   if (mbs > 0) {
735     PetscMemcpy(c->j,a->j,nz*sizeof(int));
736     if (cpvalues == COPY_VALUES) {
737       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
738     }
739   }
740 
741   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
742   c->sorted      = a->sorted;
743   c->roworiented = a->roworiented;
744   c->nonew       = a->nonew;
745 
746   if (a->diag) {
747     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
748     PLogObjectMemory(C,(mbs+1)*sizeof(int));
749     for ( i=0; i<mbs; i++ ) {
750       c->diag[i] = a->diag[i];
751     }
752   }
753   else c->diag          = 0;
754   c->nz                 = a->nz;
755   c->maxnz              = a->maxnz;
756   c->solve_work         = 0;
757   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
758   c->mult_work          = 0;
759   *B = C;
760   return 0;
761 }
762 
763 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
764 {
765   Mat_SeqBAIJ  *a;
766   Mat          B;
767   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
768   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
769   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
770   int          *masked, nmask,tmp,ishift,bs2;
771   Scalar       *aa;
772   MPI_Comm     comm = ((PetscObject) viewer)->comm;
773 
774   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
775   bs2  = bs*bs;
776 
777   MPI_Comm_size(comm,&size);
778   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
779   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
780   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
781   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
782   M = header[1]; N = header[2]; nz = header[3];
783 
784   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
785 
786   /*
787      This code adds extra rows to make sure the number of rows is
788     divisible by the blocksize
789   */
790   mbs        = M/bs;
791   extra_rows = bs - M + bs*(mbs);
792   if (extra_rows == bs) extra_rows = 0;
793   else                  mbs++;
794   if (extra_rows) {
795     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
796   }
797 
798   /* read in row lengths */
799   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
800   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
801   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
802 
803   /* read in column indices */
804   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
805   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
806   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
807 
808   /* loop over row lengths determining block row lengths */
809   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
810   PetscMemzero(browlengths,mbs*sizeof(int));
811   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
812   PetscMemzero(mask,mbs*sizeof(int));
813   masked = mask + mbs;
814   rowcount = 0; nzcount = 0;
815   for ( i=0; i<mbs; i++ ) {
816     nmask = 0;
817     for ( j=0; j<bs; j++ ) {
818       kmax = rowlengths[rowcount];
819       for ( k=0; k<kmax; k++ ) {
820         tmp = jj[nzcount++]/bs;
821         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
822       }
823       rowcount++;
824     }
825     browlengths[i] += nmask;
826     /* zero out the mask elements we set */
827     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
828   }
829 
830   /* create our matrix */
831   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
832          CHKERRQ(ierr);
833   B = *A;
834   a = (Mat_SeqBAIJ *) B->data;
835 
836   /* set matrix "i" values */
837   a->i[0] = 0;
838   for ( i=1; i<= mbs; i++ ) {
839     a->i[i]      = a->i[i-1] + browlengths[i-1];
840     a->ilen[i-1] = browlengths[i-1];
841   }
842   a->nz = 0;
843   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
844 
845   /* read in nonzero values */
846   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
847   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
848   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
849 
850   /* set "a" and "j" values into matrix */
851   nzcount = 0; jcount = 0;
852   for ( i=0; i<mbs; i++ ) {
853     nzcountb = nzcount;
854     nmask    = 0;
855     for ( j=0; j<bs; j++ ) {
856       kmax = rowlengths[i*bs+j];
857       for ( k=0; k<kmax; k++ ) {
858         tmp = jj[nzcount++]/bs;
859 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
860       }
861       rowcount++;
862     }
863     /* sort the masked values */
864     PetscSortInt(nmask,masked);
865 
866     /* set "j" values into matrix */
867     maskcount = 1;
868     for ( j=0; j<nmask; j++ ) {
869       a->j[jcount++]  = masked[j];
870       mask[masked[j]] = maskcount++;
871     }
872     /* set "a" values into matrix */
873     ishift = bs2*a->i[i];
874     for ( j=0; j<bs; j++ ) {
875       kmax = rowlengths[i*bs+j];
876       for ( k=0; k<kmax; k++ ) {
877         tmp    = jj[nzcountb]/bs ;
878         block  = mask[tmp] - 1;
879         point  = jj[nzcountb] - bs*tmp;
880         idx    = ishift + bs2*block + j + bs*point;
881         a->a[idx] = aa[nzcountb++];
882       }
883     }
884     /* zero out the mask elements we set */
885     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
886   }
887   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
888 
889   PetscFree(rowlengths);
890   PetscFree(browlengths);
891   PetscFree(aa);
892   PetscFree(jj);
893   PetscFree(mask);
894 
895   B->assembled = PETSC_TRUE;
896 
897   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
898   if (flg) {
899     Viewer tviewer;
900     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
901     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
902     ierr = MatView(B,tviewer); CHKERRQ(ierr);
903     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
904   }
905   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
906   if (flg) {
907     Viewer tviewer;
908     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
909     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
910     ierr = MatView(B,tviewer); CHKERRQ(ierr);
911     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
912   }
913   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
914   if (flg) {
915     Viewer tviewer;
916     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
917     ierr = MatView(B,tviewer); CHKERRQ(ierr);
918     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
919   }
920   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
921   if (flg) {
922     Viewer tviewer;
923     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
924     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
925     ierr = MatView(B,tviewer); CHKERRQ(ierr);
926     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
927   }
928   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
929   if (flg) {
930     Viewer tviewer;
931     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
932     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
933     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
934     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
935   }
936   return 0;
937 }
938 
939 
940 
941