xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bcd2baec2099d24a3a9d1d6033b0e7e45ccc83b9)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.9 1996/03/04 05:16:18 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = a->indexshift;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sysio.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l;
104   Scalar      *aa;
105 
106   ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs*bs;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs*bs*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerFileGetFormat_Private(viewer,&format);
165   if (format == FILE_FORMAT_INFO) {
166     /* no need to print additional information */ ;
167   }
168   else if (format == FILE_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) {
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j]));
181           }
182           else {
183             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j]));
184           }
185 #else
186             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]);
187 #endif
188           }
189         }
190         fprintf(fd,"\n");
191       }
192     }
193   }
194   fflush(fd);
195   return 0;
196 }
197 
198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
199 {
200   Mat         A = (Mat) obj;
201   PetscObject vobj = (PetscObject) viewer;
202 
203   if (!viewer) {
204     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
205   }
206   if (vobj->cookie == VIEWER_COOKIE) {
207     if (vobj->type == MATLAB_VIEWER) {
208       SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
209     }
210     else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){
211       return MatView_SeqBAIJ_ASCII(A,viewer);
212     }
213     else if (vobj->type == BINARY_FILE_VIEWER) {
214       return MatView_SeqBAIJ_Binary(A,viewer);
215     }
216   }
217   else if (vobj->cookie == DRAW_COOKIE) {
218     if (vobj->type == NULLWINDOW) return 0;
219     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
220   }
221   return 0;
222 }
223 
224 
225 static int MatZeroEntries_SeqBAIJ(Mat A)
226 {
227   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
228   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
229   return 0;
230 }
231 
232 int MatDestroy_SeqBAIJ(PetscObject obj)
233 {
234   Mat        A  = (Mat) obj;
235   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
236 
237 #if defined(PETSC_LOG)
238   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
239 #endif
240   PetscFree(a->a);
241   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
242   if (a->diag) PetscFree(a->diag);
243   if (a->ilen) PetscFree(a->ilen);
244   if (a->imax) PetscFree(a->imax);
245   if (a->solve_work) PetscFree(a->solve_work);
246   if (a->mult_work) PetscFree(a->mult_work);
247   PetscFree(a);
248   PLogObjectDestroy(A);
249   PetscHeaderDestroy(A);
250   return 0;
251 }
252 
253 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
254 {
255   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
256   if      (op == ROW_ORIENTED)              a->roworiented = 1;
257   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
258   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
259   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
260   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
261   else if (op == ROWS_SORTED ||
262            op == SYMMETRIC_MATRIX ||
263            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
264            op == YES_NEW_DIAGONALS)
265     PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
266   else if (op == NO_NEW_DIAGONALS)
267     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
268   else
269     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
270   return 0;
271 }
272 
273 
274 /* -------------------------------------------------------*/
275 /* Should check that shapes of vectors and matrices match */
276 /* -------------------------------------------------------*/
277 #include "pinclude/plapack.h"
278 
279 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
280 {
281   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
282   Scalar          *xg,*yg;
283   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
284   register Scalar x1,x2,x3,x4,x5;
285   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
286   int             bs = a->bs,j,n,bs2 = bs*bs;
287 
288   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
289   PetscMemzero(y,m*sizeof(Scalar));
290   x     = x;
291   idx   = a->j;
292   v     = a->a;
293   ii    = a->i;
294 
295   switch (bs) {
296     case 1:
297      for ( i=0; i<m; i++ ) {
298        n    = ii[1] - ii[0]; ii++;
299        sum  = 0.0;
300        while (n--) sum += *v++ * x[*idx++];
301        y[i] = sum;
302       }
303       break;
304     case 2:
305       for ( i=0; i<mbs; i++ ) {
306         n  = ii[1] - ii[0]; ii++;
307         sum1 = 0.0; sum2 = 0.0;
308         for ( j=0; j<n; j++ ) {
309           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
310           sum1 += v[0]*x1 + v[2]*x2;
311           sum2 += v[1]*x1 + v[3]*x2;
312           v += 4;
313         }
314         y[0] += sum1; y[1] += sum2;
315         y += 2;
316       }
317       break;
318     case 3:
319       for ( i=0; i<mbs; i++ ) {
320         n  = ii[1] - ii[0]; ii++;
321         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
322         for ( j=0; j<n; j++ ) {
323           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
324           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
325           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
326           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
327           v += 9;
328         }
329         y[0] += sum1; y[1] += sum2; y[2] += sum3;
330         y += 3;
331       }
332       break;
333     case 4:
334       for ( i=0; i<mbs; i++ ) {
335         n  = ii[1] - ii[0]; ii++;
336         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
337         for ( j=0; j<n; j++ ) {
338           xb = x + 4*(*idx++);
339           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
340           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
341           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
342           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
343           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
344           v += 16;
345         }
346         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
347         y += 4;
348       }
349       break;
350     case 5:
351       for ( i=0; i<mbs; i++ ) {
352         n  = ii[1] - ii[0]; ii++;
353         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
354         for ( j=0; j<n; j++ ) {
355           xb = x + 5*(*idx++);
356           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
357           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
358           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
359           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
360           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
361           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
362           v += 25;
363         }
364         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
365         y += 5;
366       }
367       break;
368       /* block sizes larger then 5 by 5 are handled by BLAS */
369     default: {
370       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
371       if (!a->mult_work) {
372         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
373         CHKPTRQ(a->mult_work);
374       }
375       work = a->mult_work;
376       for ( i=0; i<mbs; i++ ) {
377         n     = ii[1] - ii[0]; ii++;
378         ncols = n*bs;
379         workt = work;
380         for ( j=0; j<n; j++ ) {
381           xb = x + bs*(*idx++);
382           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
383           workt += bs;
384         }
385         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
386         v += n*bs2;
387         y += bs;
388       }
389     }
390   }
391   PLogFlops(2*bs2*a->nz - m);
392   return 0;
393 }
394 
395 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
396 {
397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
398   if (nz)  *nz  = a->bs*a->bs*a->nz;
399   if (nza) *nza = a->maxnz;
400   if (mem) *mem = (int)A->mem;
401   return 0;
402 }
403 
404 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
405 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
406 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
407 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
408 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
409 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
410 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
411 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
412 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
413 
414 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
415 {
416   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
417   *m = a->m; *n = a->n;
418   return 0;
419 }
420 
421 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
422 {
423   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
424   *m = 0; *n = a->m;
425   return 0;
426 }
427 
428 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
429 {
430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
431   Scalar      *v = a->a;
432   double      sum = 0.0;
433   int         i;
434 
435   if (type == NORM_FROBENIUS) {
436     for (i=0; i<a->nz; i++ ) {
437 #if defined(PETSC_COMPLEX)
438       sum += real(conj(*v)*(*v)); v++;
439 #else
440       sum += (*v)*(*v); v++;
441 #endif
442     }
443     *norm = sqrt(sum);
444   }
445   else {
446     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
447   }
448   return 0;
449 }
450 
451 /*
452      note: This can only work for identity for row and col. It would
453    be good to check this and otherwise generate an error.
454 */
455 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
456 {
457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
458   Mat         outA;
459   int         ierr;
460 
461   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
462 
463   outA          = inA;
464   inA->factor   = FACTOR_LU;
465   a->row        = row;
466   a->col        = col;
467 
468   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
469 
470   if (!a->diag) {
471     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
472   }
473   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
474   return 0;
475 }
476 
477 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
478 {
479   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
480   int         one = 1, totalnz = a->bs*a->bs*a->nz;
481   BLscal_( &totalnz, alpha, a->a, &one );
482   PLogFlops(totalnz);
483   return 0;
484 }
485 
486 int MatPrintHelp_SeqBAIJ(Mat A)
487 {
488   static int called = 0;
489 
490   if (called) return 0; else called = 1;
491   return 0;
492 }
493 /* -------------------------------------------------------------------*/
494 static struct _MatOps MatOps = {0,
495        0,0,
496        MatMult_SeqBAIJ,0,
497        0,0,
498        MatSolve_SeqBAIJ,0,
499        0,0,
500        MatLUFactor_SeqBAIJ,0,
501        0,
502        0,
503        MatGetInfo_SeqBAIJ,0,
504        0,0,MatNorm_SeqBAIJ,
505        0,0,
506        0,
507        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
508        MatGetReordering_SeqBAIJ,
509        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
510        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
511        MatILUFactorSymbolic_SeqBAIJ,0,
512        0,0,/*MatConvert_SeqBAIJ*/ 0,
513        0,0,
514        MatConvertSameType_SeqBAIJ,0,0,
515        MatILUFactor_SeqBAIJ,0,0,
516        0,0,
517        0,0,
518        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
519        0};
520 
521 /*@C
522    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
523    (the default parallel PETSc format).  For good matrix assembly performance
524    the user should preallocate the matrix storage by setting the parameter nz
525    (or nzz).  By setting these parameters accurately, performance can be
526    increased by more than a factor of 50.
527 
528    Input Parameters:
529 .  comm - MPI communicator, set to MPI_COMM_SELF
530 .  bs - size of block
531 .  m - number of rows
532 .  n - number of columns
533 .  nz - number of block nonzeros per block row (same for all rows)
534 .  nzz - number of block nonzeros per block row or PETSC_NULL
535          (possibly different for each row)
536 
537    Output Parameter:
538 .  A - the matrix
539 
540    Notes:
541    The BAIJ format, is fully compatible with standard Fortran 77
542    storage.  That is, the stored row and column indices can begin at
543    either one (as in Fortran) or zero.  See the users manual for details.
544 
545    Specify the preallocated storage with either nz or nnz (not both).
546    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
547    allocation.  For additional details, see the users manual chapter on
548    matrices and the file $(PETSC_DIR)/Performance.
549 
550 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
551 @*/
552 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
553 {
554   Mat         B;
555   Mat_SeqBAIJ *b;
556   int         i,len,ierr, flg,mbs = m/bs;
557 
558   if (mbs*bs != m)
559     SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize");
560 
561   *A      = 0;
562   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
563   PLogObjectCreate(B);
564   B->data             = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
565   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
566   switch (bs) {
567     case 1:
568       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
569       break;
570     case 2:
571       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
572       break;
573     case 3:
574       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
575       break;
576     case 4:
577       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
578       break;
579     case 5:
580       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
581       break;
582   }
583   B->destroy          = MatDestroy_SeqBAIJ;
584   B->view             = MatView_SeqBAIJ;
585   B->factor           = 0;
586   B->lupivotthreshold = 1.0;
587   b->row              = 0;
588   b->col              = 0;
589   b->reallocs         = 0;
590 
591   b->m       = m;
592   b->mbs     = mbs;
593   b->n       = n;
594   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
595   if (nnz == PETSC_NULL) {
596     if (nz == PETSC_DEFAULT) nz = 5;
597     else if (nz <= 0)        nz = 1;
598     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
599     nz = nz*mbs;
600   }
601   else {
602     nz = 0;
603     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
604   }
605 
606   /* allocate the matrix space */
607   len   = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int);
608   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
609   PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar));
610   b->j  = (int *) (b->a + nz*bs*bs);
611   PetscMemzero(b->j,nz*sizeof(int));
612   b->i  = b->j + nz;
613   b->singlemalloc = 1;
614 
615   b->i[0] = 0;
616   for (i=1; i<mbs+1; i++) {
617     b->i[i] = b->i[i-1] + b->imax[i-1];
618   }
619 
620   /* b->ilen will count nonzeros in each block row so far. */
621   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
622   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
623   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
624 
625   b->bs               = bs;
626   b->mbs              = mbs;
627   b->nz               = 0;
628   b->maxnz            = nz;
629   b->sorted           = 0;
630   b->roworiented      = 1;
631   b->nonew            = 0;
632   b->diag             = 0;
633   b->solve_work       = 0;
634   b->mult_work        = 0;
635   b->spptr            = 0;
636 
637   *A = B;
638   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
639   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
640   return 0;
641 }
642 
643 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
644 {
645   Mat         C;
646   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
647   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz;
648 
649   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
650 
651   *B = 0;
652   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
653   PLogObjectCreate(C);
654   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
655   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
656   C->destroy    = MatDestroy_SeqBAIJ;
657   C->view       = MatView_SeqBAIJ;
658   C->factor     = A->factor;
659   c->row        = 0;
660   c->col        = 0;
661   C->assembled  = PETSC_TRUE;
662 
663   c->m          = a->m;
664   c->n          = a->n;
665   c->bs         = a->bs;
666   c->mbs        = a->mbs;
667   c->nbs        = a->nbs;
668 
669   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
670   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
671   for ( i=0; i<mbs; i++ ) {
672     c->imax[i] = a->imax[i];
673     c->ilen[i] = a->ilen[i];
674   }
675 
676   /* allocate the matrix space */
677   c->singlemalloc = 1;
678   len   = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int));
679   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
680   c->j  = (int *) (c->a + nz*bs*bs);
681   c->i  = c->j + nz;
682   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
683   if (mbs > 0) {
684     PetscMemcpy(c->j,a->j,nz*sizeof(int));
685     if (cpvalues == COPY_VALUES) {
686       PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar));
687     }
688   }
689 
690   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
691   c->sorted      = a->sorted;
692   c->roworiented = a->roworiented;
693   c->nonew       = a->nonew;
694 
695   if (a->diag) {
696     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
697     PLogObjectMemory(C,(mbs+1)*sizeof(int));
698     for ( i=0; i<mbs; i++ ) {
699       c->diag[i] = a->diag[i];
700     }
701   }
702   else c->diag          = 0;
703   c->nz                 = a->nz;
704   c->maxnz              = a->maxnz;
705   c->solve_work         = 0;
706   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
707   c->mult_work          = 0;
708   *B = C;
709   return 0;
710 }
711 
712 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A)
713 {
714   Mat_SeqBAIJ  *a;
715   Mat          B;
716   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
717   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
718   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
719   int          *masked, nmask,tmp,ishift,bs2;
720   Scalar       *aa;
721   PetscObject  vobj = (PetscObject) bview;
722   MPI_Comm     comm = vobj->comm;
723 
724   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
725   bs2  = bs*bs;
726 
727   MPI_Comm_size(comm,&size);
728   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
729   ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
730   ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr);
731   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
732   M = header[1]; N = header[2]; nz = header[3];
733 
734   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
735 
736   /*
737      This code adds extra rows to make sure the number of rows is
738     divisible by the blocksize
739   */
740   mbs        = M/bs;
741   extra_rows = bs - M + bs*(mbs);
742   if (extra_rows == bs) extra_rows = 0;
743   else                  mbs++;
744   if (extra_rows) {
745     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
746   }
747 
748   /* read in row lengths */
749   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
750   ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
751   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
752 
753   /* read in column indices */
754   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
755   ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr);
756   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
757 
758   /* loop over row lengths determining block row lengths */
759   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
760   PetscMemzero(browlengths,mbs*sizeof(int));
761   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
762   PetscMemzero(mask,mbs*sizeof(int));
763   masked = mask + mbs;
764   rowcount = 0; nzcount = 0;
765   for ( i=0; i<mbs; i++ ) {
766     nmask = 0;
767     for ( j=0; j<bs; j++ ) {
768       kmax = rowlengths[rowcount];
769       for ( k=0; k<kmax; k++ ) {
770         tmp = jj[nzcount++]/bs;
771         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
772       }
773       rowcount++;
774     }
775     browlengths[i] += nmask;
776     /* zero out the mask elements we set */
777     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
778   }
779 
780   /* create our matrix */
781   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
782          CHKERRQ(ierr);
783   B = *A;
784   a = (Mat_SeqBAIJ *) B->data;
785 
786   /* set matrix "i" values */
787   a->i[0] = 0;
788   for ( i=1; i<= mbs; i++ ) {
789     a->i[i]      = a->i[i-1] + browlengths[i-1];
790     a->ilen[i-1] = browlengths[i-1];
791   }
792   a->nz = 0;
793   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
794 
795   /* read in nonzero values */
796   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
797   ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr);
798   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
799 
800   /* set "a" and "j" values into matrix */
801   nzcount = 0; jcount = 0;
802   for ( i=0; i<mbs; i++ ) {
803     nzcountb = nzcount;
804     nmask    = 0;
805     for ( j=0; j<bs; j++ ) {
806       kmax = rowlengths[i*bs+j];
807       for ( k=0; k<kmax; k++ ) {
808         tmp = jj[nzcount++]/bs;
809 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
810       }
811       rowcount++;
812     }
813     /* sort the masked values */
814     SYIsort(nmask,masked);
815 
816     /* set "j" values into matrix */
817     maskcount = 1;
818     for ( j=0; j<nmask; j++ ) {
819       a->j[jcount++]  = masked[j];
820       mask[masked[j]] = maskcount++;
821     }
822     /* set "a" values into matrix */
823     ishift = bs2*a->i[i];
824     for ( j=0; j<bs; j++ ) {
825       kmax = rowlengths[i*bs+j];
826       for ( k=0; k<kmax; k++ ) {
827         tmp    = jj[nzcountb]/bs ;
828         block  = mask[tmp] - 1;
829         point  = jj[nzcountb] - bs*tmp;
830         idx    = ishift + bs2*block + j + bs*point;
831         a->a[idx] = aa[nzcountb++];
832       }
833     }
834     /* zero out the mask elements we set */
835     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
836   }
837   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
838 
839   PetscFree(rowlengths);
840   PetscFree(browlengths);
841   PetscFree(aa);
842   PetscFree(jj);
843   PetscFree(mask);
844 
845   B->assembled = PETSC_TRUE;
846 
847   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
848   if (flg) {
849     Viewer viewer;
850     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
851     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr);
852     ierr = MatView(B,viewer); CHKERRQ(ierr);
853     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
854   }
855   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
856   if (flg) {
857     Viewer viewer;
858     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
859     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
860     ierr = MatView(B,viewer); CHKERRQ(ierr);
861     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
862   }
863   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
864   if (flg) {
865     Viewer viewer;
866     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
867     ierr = MatView(B,viewer); CHKERRQ(ierr);
868     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
869   }
870   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
871   if (flg) {
872     Viewer viewer;
873     ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr);
874     ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr);
875     ierr = MatView(B,viewer); CHKERRQ(ierr);
876     ierr = ViewerDestroy(viewer); CHKERRQ(ierr);
877   }
878   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
879   if (flg) {
880     Draw    win;
881     ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr);
882     ierr = MatView(B,(Viewer)win); CHKERRQ(ierr);
883     ierr = DrawSyncFlush(win); CHKERRQ(ierr);
884     ierr = DrawDestroy(win); CHKERRQ(ierr);
885   }
886   return 0;
887 }
888 
889 
890 
891