xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 608f96eb4f80b150d49ed2fa0fafd4acdd2a6f3a)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.44 1996/05/02 15:42:40 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
46     PetscFree(ia); PetscFree(ja);
47   } else {
48     if (ishift == oshift) {
49       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50     }
51     else if (ishift == -1) {
52       /* temporarily subtract 1 from i and j indices */
53       int nz = a->i[n] - 1;
54       for ( i=0; i<nz; i++ ) a->j[i]--;
55       for ( i=0; i<n+1; i++ ) a->i[i]--;
56       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57       for ( i=0; i<nz; i++ ) a->j[i]++;
58       for ( i=0; i<n+1; i++ ) a->i[i]++;
59     } else {
60       /* temporarily add 1 to i and j indices */
61       int nz = a->i[n] - 1;
62       for ( i=0; i<nz; i++ ) a->j[i]++;
63       for ( i=0; i<n+1; i++ ) a->i[i]++;
64       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65       for ( i=0; i<nz; i++ ) a->j[i]--;
66       for ( i=0; i<n+1; i++ ) a->i[i]--;
67     }
68   }
69   return 0;
70 }
71 
72 /*
73      Adds diagonal pointers to sparse matrix structure.
74 */
75 
76 int MatMarkDiag_SeqBAIJ(Mat A)
77 {
78   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
79   int         i,j, *diag, m = a->mbs;
80 
81   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82   PLogObjectMemory(A,(m+1)*sizeof(int));
83   for ( i=0; i<m; i++ ) {
84     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85       if (a->j[j] == i) {
86         diag[i] = j;
87         break;
88       }
89     }
90   }
91   a->diag = diag;
92   return 0;
93 }
94 
95 #include "draw.h"
96 #include "pinclude/pviewer.h"
97 #include "sys.h"
98 
99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
100 {
101   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
102   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103   Scalar      *aa;
104 
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108   col_lens[1] = a->m;
109   col_lens[2] = a->n;
110   col_lens[3] = a->nz*bs2;
111 
112   /* store lengths of each row and write (including header) to file */
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117     }
118   }
119   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
120   PetscFree(col_lens);
121 
122   /* store column indices (zero start index) */
123   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124   count = 0;
125   for ( i=0; i<a->mbs; i++ ) {
126     for ( j=0; j<bs; j++ ) {
127       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128         for ( l=0; l<bs; l++ ) {
129           jj[count++] = bs*a->j[k] + l;
130         }
131       }
132     }
133   }
134   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135   PetscFree(jj);
136 
137   /* store nonzero values */
138   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139   count = 0;
140   for ( i=0; i<a->mbs; i++ ) {
141     for ( j=0; j<bs; j++ ) {
142       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143         for ( l=0; l<bs; l++ ) {
144           aa[count++] = a->a[bs2*k + l*bs + j];
145         }
146       }
147     }
148   }
149   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150   PetscFree(aa);
151   return 0;
152 }
153 
154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
155 {
156   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
157   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
158   FILE        *fd;
159   char        *outputname;
160 
161   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
162   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
163   ierr = ViewerGetFormat(viewer,&format);
164   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
165     fprintf(fd,"  block size is %d\n",bs);
166   }
167   else if (format == ASCII_FORMAT_MATLAB) {
168     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
169   }
170   else if (format == ASCII_FORMAT_COMMON) {
171     for ( i=0; i<a->mbs; i++ ) {
172       for ( j=0; j<bs; j++ ) {
173         fprintf(fd,"row %d:",i*bs+j);
174         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
175           for ( l=0; l<bs; l++ ) {
176 #if defined(PETSC_COMPLEX)
177           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
178             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
179               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
180           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
181             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
182 #else
183           if (a->a[bs2*k + l*bs + j] != 0.0)
184             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
185 #endif
186           }
187         }
188         fprintf(fd,"\n");
189       }
190     }
191   }
192   else {
193     for ( i=0; i<a->mbs; i++ ) {
194       for ( j=0; j<bs; j++ ) {
195         fprintf(fd,"row %d:",i*bs+j);
196         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197           for ( l=0; l<bs; l++ ) {
198 #if defined(PETSC_COMPLEX)
199           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
200             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
201               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
202           }
203           else {
204             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
205           }
206 #else
207             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
208 #endif
209           }
210         }
211         fprintf(fd,"\n");
212       }
213     }
214   }
215   fflush(fd);
216   return 0;
217 }
218 
219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
220 {
221   Mat         A = (Mat) obj;
222   ViewerType  vtype;
223   int         ierr;
224 
225   if (!viewer) {
226     viewer = STDOUT_VIEWER_SELF;
227   }
228 
229   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
230   if (vtype == MATLAB_VIEWER) {
231     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
232   }
233   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234     return MatView_SeqBAIJ_ASCII(A,viewer);
235   }
236   else if (vtype == BINARY_FILE_VIEWER) {
237     return MatView_SeqBAIJ_Binary(A,viewer);
238   }
239   else if (vtype == DRAW_VIEWER) {
240     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
241   }
242   return 0;
243 }
244 
245 #define CHUNKSIZE  10
246 
247 /* This version has row oriented v  */
248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249 {
250   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
254   int          ridx,cidx,bs2=a->bs2;
255   Scalar      *ap,value,*aa=a->a,*bap;
256 
257   for ( k=0; k<m; k++ ) { /* loop over added rows */
258     row  = im[k]; brow = row/bs;
259     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262     rmax = imax[brow]; nrow = ailen[brow];
263     low = 0;
264     for ( l=0; l<n; l++ ) { /* loop over added columns */
265       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267       col = in[l]; bcol = col/bs;
268       ridx = row % bs; cidx = col % bs;
269       if (roworiented) {
270         value = *v++;
271       }
272       else {
273         value = v[k + l*m];
274       }
275       if (!sorted) low = 0; high = nrow;
276       while (high-low > 5) {
277         t = (low+high)/2;
278         if (rp[t] > bcol) high = t;
279         else             low  = t;
280       }
281       for ( i=low; i<high; i++ ) {
282         if (rp[i] > bcol) break;
283         if (rp[i] == bcol) {
284           bap  = ap +  bs2*i + bs*cidx + ridx;
285           if (is == ADD_VALUES) *bap += value;
286           else                  *bap  = value;
287           goto noinsert;
288         }
289       }
290       if (nonew) goto noinsert;
291       if (nrow >= rmax) {
292         /* there is no extra room in row, therefore enlarge */
293         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294         Scalar *new_a;
295 
296         /* malloc new storage space */
297         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
299         new_j   = (int *) (new_a + bs2*new_nz);
300         new_i   = new_j + new_nz;
301 
302         /* copy over old data into new slots */
303         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
304         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308                                                            len*sizeof(int));
309         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313         /* free up old matrix storage */
314         PetscFree(a->a);
315         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317         a->singlemalloc = 1;
318 
319         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
321         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322         a->maxnz += CHUNKSIZE;
323         a->reallocs++;
324         a->nz++;
325       }
326       N = nrow++ - 1;
327       /* shift up all the later entries in this row */
328       for ( ii=N; ii>=i; ii-- ) {
329         rp[ii+1] = rp[ii];
330          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331       }
332       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333       rp[i] = bcol;
334       ap[bs2*i + bs*cidx + ridx] = value;
335       noinsert:;
336       low = i;
337     }
338     ailen[brow] = nrow;
339   }
340   return 0;
341 }
342 
343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
344 {
345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
346   *m = a->m; *n = a->n;
347   return 0;
348 }
349 
350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
351 {
352   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
353   *m = 0; *n = a->m;
354   return 0;
355 }
356 
357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
361   Scalar      *aa,*v_i,*aa_i;
362 
363   bs  = a->bs;
364   ai  = a->i;
365   aj  = a->j;
366   aa  = a->a;
367   bs2 = a->bs2;
368 
369   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
370 
371   bn  = row/bs;   /* Block number */
372   bp  = row % bs; /* Block Position */
373   M   = ai[bn+1] - ai[bn];
374   *nz = bs*M;
375 
376   if (v) {
377     *v = 0;
378     if (*nz) {
379       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
380       for ( i=0; i<M; i++ ) { /* for each block in the block row */
381         v_i  = *v + i*bs;
382         aa_i = aa + bs2*(ai[bn] + i);
383         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
384       }
385     }
386   }
387 
388   if (idx) {
389     *idx = 0;
390     if (*nz) {
391       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
392       for ( i=0; i<M; i++ ) { /* for each block in the block row */
393         idx_i = *idx + i*bs;
394         itmp  = bs*aj[ai[bn] + i];
395         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
396       }
397     }
398   }
399   return 0;
400 }
401 
402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
403 {
404   if (idx) {if (*idx) PetscFree(*idx);}
405   if (v)   {if (*v)   PetscFree(*v);}
406   return 0;
407 }
408 
409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410 {
411   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412   Mat         C;
413   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
414   int         *rows,*cols,bs2=a->bs2;
415   Scalar      *array=a->a;
416 
417   if (B==PETSC_NULL && mbs!=nbs)
418     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420   PetscMemzero(col,(1+nbs)*sizeof(int));
421 
422   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424   PetscFree(col);
425   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426   cols = rows + bs;
427   for ( i=0; i<mbs; i++ ) {
428     cols[0] = i*bs;
429     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430     len = ai[i+1] - ai[i];
431     for ( j=0; j<len; j++ ) {
432       rows[0] = (*aj++)*bs;
433       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435       array += bs2;
436     }
437   }
438   PetscFree(rows);
439 
440   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442 
443   if (B != PETSC_NULL) {
444     *B = C;
445   } else {
446     /* This isn't really an in-place transpose */
447     PetscFree(a->a);
448     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449     if (a->diag) PetscFree(a->diag);
450     if (a->ilen) PetscFree(a->ilen);
451     if (a->imax) PetscFree(a->imax);
452     if (a->solve_work) PetscFree(a->solve_work);
453     PetscFree(a);
454     PetscMemcpy(A,C,sizeof(struct _Mat));
455     PetscHeaderDestroy(C);
456   }
457   return 0;
458 }
459 
460 
461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462 {
463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465   int        m = a->m,*ip, N, *ailen = a->ilen;
466   int        mbs = a->mbs, bs2 = a->bs2;
467   Scalar     *aa = a->a, *ap;
468 
469   if (mode == FLUSH_ASSEMBLY) return 0;
470 
471   for ( i=1; i<mbs; i++ ) {
472     /* move each row back by the amount of empty slots (fshift) before it*/
473     fshift += imax[i-1] - ailen[i-1];
474     if (fshift) {
475       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476       N = ailen[i];
477       for ( j=0; j<N; j++ ) {
478         ip[j-fshift] = ip[j];
479         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480       }
481     }
482     ai[i] = ai[i-1] + ailen[i-1];
483   }
484   if (mbs) {
485     fshift += imax[mbs-1] - ailen[mbs-1];
486     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487   }
488   /* reset ilen and imax for each row */
489   for ( i=0; i<mbs; i++ ) {
490     ailen[i] = imax[i] = ai[i+1] - ai[i];
491   }
492   a->nz = ai[mbs];
493 
494   /* diagonals may have moved, so kill the diagonal pointers */
495   if (fshift && a->diag) {
496     PetscFree(a->diag);
497     PLogObjectMemory(A,-(m+1)*sizeof(int));
498     a->diag = 0;
499   }
500   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502            a->reallocs);
503   return 0;
504 }
505 
506 static int MatZeroEntries_SeqBAIJ(Mat A)
507 {
508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
509   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
510   return 0;
511 }
512 
513 int MatDestroy_SeqBAIJ(PetscObject obj)
514 {
515   Mat         A  = (Mat) obj;
516   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
517 
518 #if defined(PETSC_LOG)
519   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
520 #endif
521   PetscFree(a->a);
522   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
523   if (a->diag) PetscFree(a->diag);
524   if (a->ilen) PetscFree(a->ilen);
525   if (a->imax) PetscFree(a->imax);
526   if (a->solve_work) PetscFree(a->solve_work);
527   if (a->mult_work) PetscFree(a->mult_work);
528   PetscFree(a);
529   PLogObjectDestroy(A);
530   PetscHeaderDestroy(A);
531   return 0;
532 }
533 
534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
535 {
536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
537   if      (op == ROW_ORIENTED)              a->roworiented = 1;
538   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
539   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
540   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
541   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
542   else if (op == ROWS_SORTED ||
543            op == SYMMETRIC_MATRIX ||
544            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
545            op == YES_NEW_DIAGONALS)
546     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
547   else if (op == NO_NEW_DIAGONALS)
548     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
549   else
550     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
551   return 0;
552 }
553 
554 
555 /* -------------------------------------------------------*/
556 /* Should check that shapes of vectors and matrices match */
557 /* -------------------------------------------------------*/
558 #include "pinclude/plapack.h"
559 
560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz)
561 {
562   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563   Scalar          *xg,*zg;
564   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
565   register Scalar x1,x2,x3,x4,x5;
566   int             mbs=a->mbs,i,*idx,*ii;
567   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
568 
569   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
570   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
571 
572   idx   = a->j;
573   v     = a->a;
574   ii    = a->i;
575 
576   switch (bs) {
577   case 1:
578     for ( i=0; i<mbs; i++ ) {
579       n    = ii[1] - ii[0]; ii++;
580       sum  = 0.0;
581       while (n--) sum += *v++ * x[*idx++];
582       z[i] = sum;
583     }
584     break;
585   case 2:
586     for ( i=0; i<mbs; i++ ) {
587       n  = ii[1] - ii[0]; ii++;
588       sum1 = 0.0; sum2 = 0.0;
589       for ( j=0; j<n; j++ ) {
590         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
591         sum1 += v[0]*x1 + v[2]*x2;
592         sum2 += v[1]*x1 + v[3]*x2;
593         v += 4;
594       }
595       z[0] = sum1; z[1] = sum2;
596       z += 2;
597     }
598     break;
599   case 3:
600     for ( i=0; i<mbs; i++ ) {
601       n  = ii[1] - ii[0]; ii++;
602       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
603       for ( j=0; j<n; j++ ) {
604         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
605         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
606         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
607         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
608         v += 9;
609       }
610       z[0] = sum1; z[1] = sum2; z[2] = sum3;
611       z += 3;
612     }
613     break;
614   case 4:
615     for ( i=0; i<mbs; i++ ) {
616       n  = ii[1] - ii[0]; ii++;
617       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
618       for ( j=0; j<n; j++ ) {
619         xb = x + 4*(*idx++);
620         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
621         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
622         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
623         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
624         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
625         v += 16;
626       }
627       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
628       z += 4;
629     }
630     break;
631   case 5:
632     for ( i=0; i<mbs; i++ ) {
633       n  = ii[1] - ii[0]; ii++;
634       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
635       for ( j=0; j<n; j++ ) {
636         xb = x + 5*(*idx++);
637         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
638         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
639         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
640         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
641         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
642         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
643         v += 25;
644       }
645       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
646       z += 5;
647     }
648     break;
649     /* block sizes larger then 5 by 5 are handled by BLAS */
650   default: {
651       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
652       if (!a->mult_work) {
653         k = PetscMax(a->m,a->n);
654         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
655         CHKPTRQ(a->mult_work);
656       }
657       work = a->mult_work;
658       for ( i=0; i<mbs; i++ ) {
659         n     = ii[1] - ii[0]; ii++;
660         ncols = n*bs;
661         workt = work;
662         for ( j=0; j<n; j++ ) {
663           xb = x + bs*(*idx++);
664           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
665           workt += bs;
666         }
667         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
668         v += n*bs2;
669         z += bs;
670       }
671     }
672   }
673   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
674   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
675   PLogFlops(2*a->nz*bs2 - a->m);
676   return 0;
677 }
678 
679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
680 {
681   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
682   Scalar          *xg,*zg,*zb;
683   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
684   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
685   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
686 
687 
688   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690   PetscMemzero(z,N*sizeof(Scalar));
691 
692   idx   = a->j;
693   v     = a->a;
694   ii    = a->i;
695 
696   switch (bs) {
697   case 1:
698     for ( i=0; i<mbs; i++ ) {
699       n  = ii[1] - ii[0]; ii++;
700       xb = x + i; x1 = xb[0];
701       ib = idx + ai[i];
702       for ( j=0; j<n; j++ ) {
703         rval    = ib[j];
704         z[rval] += *v++ * x1;
705       }
706     }
707     break;
708   case 2:
709     for ( i=0; i<mbs; i++ ) {
710       n  = ii[1] - ii[0]; ii++;
711       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
712       ib = idx + ai[i];
713       for ( j=0; j<n; j++ ) {
714         rval      = ib[j]*2;
715         z[rval++] += v[0]*x1 + v[1]*x2;
716         z[rval++] += v[2]*x1 + v[3]*x2;
717         v += 4;
718       }
719     }
720     break;
721   case 3:
722     for ( i=0; i<mbs; i++ ) {
723       n  = ii[1] - ii[0]; ii++;
724       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
725       ib = idx + ai[i];
726       for ( j=0; j<n; j++ ) {
727         rval      = ib[j]*3;
728         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
729         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
730         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
731         v += 9;
732       }
733     }
734     break;
735   case 4:
736     for ( i=0; i<mbs; i++ ) {
737       n  = ii[1] - ii[0]; ii++;
738       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
739       ib = idx + ai[i];
740       for ( j=0; j<n; j++ ) {
741         rval      = ib[j]*4;
742         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
743         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
744         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
745         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
746         v += 16;
747       }
748     }
749     break;
750   case 5:
751     for ( i=0; i<mbs; i++ ) {
752       n  = ii[1] - ii[0]; ii++;
753       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
754       x4 = xb[3];   x5 = xb[4];
755       ib = idx + ai[i];
756       for ( j=0; j<n; j++ ) {
757         rval      = ib[j]*5;
758         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
759         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
760         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
761         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
762         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
763         v += 25;
764       }
765     }
766     break;
767       /* block sizes larger then 5 by 5 are handled by BLAS */
768     default: {
769       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
770       if (!a->mult_work) {
771         k = PetscMax(a->m,a->n);
772         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
773         CHKPTRQ(a->mult_work);
774       }
775       work = a->mult_work;
776       for ( i=0; i<mbs; i++ ) {
777         n     = ii[1] - ii[0]; ii++;
778         ncols = n*bs;
779         PetscMemzero(work,ncols*sizeof(Scalar));
780         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
781         v += n*bs2;
782         x += bs;
783         workt = work;
784         for ( j=0; j<n; j++ ) {
785           zb = z + bs*(*idx++);
786           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
787           workt += bs;
788         }
789       }
790     }
791   }
792   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
793   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
794   return 0;
795 }
796 
797 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
798 {
799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
800   if (nz)  *nz  = a->bs2*a->nz;
801   if (nza) *nza = a->maxnz;
802   if (mem) *mem = (int)A->mem;
803   return 0;
804 }
805 
806 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
807 {
808   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
809 
810   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
811 
812   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
813   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
814       (a->nz != b->nz)) {
815     *flg = PETSC_FALSE; return 0;
816   }
817 
818   /* if the a->i are the same */
819   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
820     *flg = PETSC_FALSE; return 0;
821   }
822 
823   /* if a->j are the same */
824   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
825     *flg = PETSC_FALSE; return 0;
826   }
827 
828   /* if a->a are the same */
829   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
830     *flg = PETSC_FALSE; return 0;
831   }
832   *flg = PETSC_TRUE;
833   return 0;
834 
835 }
836 
837 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
838 {
839   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
840   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
841   Scalar      *x, zero = 0.0,*aa,*aa_j;
842 
843   bs   = a->bs;
844   aa   = a->a;
845   ai   = a->i;
846   aj   = a->j;
847   ambs = a->mbs;
848   bs2  = a->bs2;
849 
850   VecSet(&zero,v);
851   VecGetArray(v,&x); VecGetLocalSize(v,&n);
852   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
853   for ( i=0; i<ambs; i++ ) {
854     for ( j=ai[i]; j<ai[i+1]; j++ ) {
855       if (aj[j] == i) {
856         row  = i*bs;
857         aa_j = aa+j*bs2;
858         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
859         break;
860       }
861     }
862   }
863   return 0;
864 }
865 
866 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
867 {
868   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
869   Scalar      *l,*r,x,*v,*aa,*li,*ri;
870   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
871 
872   ai  = a->i;
873   aj  = a->j;
874   aa  = a->a;
875   m   = a->m;
876   n   = a->n;
877   bs  = a->bs;
878   mbs = a->mbs;
879   bs2 = a->bs2;
880   if (ll) {
881     VecGetArray(ll,&l); VecGetSize(ll,&lm);
882     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
883     for ( i=0; i<mbs; i++ ) { /* for each block row */
884       M  = ai[i+1] - ai[i];
885       li = l + i*bs;
886       v  = aa + bs2*ai[i];
887       for ( j=0; j<M; j++ ) { /* for each block */
888         for ( k=0; k<bs2; k++ ) {
889           (*v++) *= li[k%bs];
890         }
891       }
892     }
893   }
894 
895   if (rr) {
896     VecGetArray(rr,&r); VecGetSize(rr,&rn);
897     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
898     for ( i=0; i<mbs; i++ ) { /* for each block row */
899       M  = ai[i+1] - ai[i];
900       v  = aa + bs2*ai[i];
901       for ( j=0; j<M; j++ ) { /* for each block */
902         ri = r + bs*aj[ai[i]+j];
903         for ( k=0; k<bs; k++ ) {
904           x = ri[k];
905           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
906         }
907       }
908     }
909   }
910   return 0;
911 }
912 
913 
914 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
915 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
916 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
917 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
918 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
919 
920 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
921 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
922 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
923 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
924 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
925 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
926 
927 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
928 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
929 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
930 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
931 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
932 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
933 
934 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
935 {
936   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
937   Scalar      *v = a->a;
938   double      sum = 0.0;
939   int         i,nz=a->nz,bs2=a->bs2;
940 
941   if (type == NORM_FROBENIUS) {
942     for (i=0; i< bs2*nz; i++ ) {
943 #if defined(PETSC_COMPLEX)
944       sum += real(conj(*v)*(*v)); v++;
945 #else
946       sum += (*v)*(*v); v++;
947 #endif
948     }
949     *norm = sqrt(sum);
950   }
951   else {
952     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
953   }
954   return 0;
955 }
956 
957 /*
958      note: This can only work for identity for row and col. It would
959    be good to check this and otherwise generate an error.
960 */
961 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
962 {
963   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
964   Mat         outA;
965   int         ierr;
966 
967   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
968 
969   outA          = inA;
970   inA->factor   = FACTOR_LU;
971   a->row        = row;
972   a->col        = col;
973 
974   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
975 
976   if (!a->diag) {
977     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
978   }
979   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
980   return 0;
981 }
982 
983 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
984 {
985   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
986   int         one = 1, totalnz = a->bs2*a->nz;
987   BLscal_( &totalnz, alpha, a->a, &one );
988   PLogFlops(totalnz);
989   return 0;
990 }
991 
992 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
993 {
994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
995   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
996   int        *ai = a->i, *ailen = a->ilen;
997   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
998   Scalar     *ap, *aa = a->a, zero = 0.0;
999 
1000   for ( k=0; k<m; k++ ) { /* loop over rows */
1001     row  = im[k]; brow = row/bs;
1002     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1003     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1004     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1005     nrow = ailen[brow];
1006     for ( l=0; l<n; l++ ) { /* loop over columns */
1007       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1008       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1009       col  = in[l] ;
1010       bcol = col/bs;
1011       cidx = col%bs;
1012       ridx = row%bs;
1013       high = nrow;
1014       low  = 0; /* assume unsorted */
1015       while (high-low > 5) {
1016         t = (low+high)/2;
1017         if (rp[t] > bcol) high = t;
1018         else             low  = t;
1019       }
1020       for ( i=low; i<high; i++ ) {
1021         if (rp[i] > bcol) break;
1022         if (rp[i] == bcol) {
1023           *v++ = ap[bs2*i+bs*cidx+ridx];
1024           goto finished;
1025         }
1026       }
1027       *v++ = zero;
1028       finished:;
1029     }
1030   }
1031   return 0;
1032 }
1033 
1034 /* -------------------------------------------------------------------*/
1035 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1036        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1037        MatMult_SeqBAIJ,0,
1038        MatMultTrans_SeqBAIJ,0,
1039        MatSolve_SeqBAIJ_N,0,
1040        0,0,
1041        MatLUFactor_SeqBAIJ,0,
1042        0,
1043        MatTranspose_SeqBAIJ,
1044        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1045        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1046        0,MatAssemblyEnd_SeqBAIJ,
1047        0,
1048        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1049        MatGetReordering_SeqBAIJ,
1050        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1051        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1052        MatILUFactorSymbolic_SeqBAIJ,0,
1053        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1054        MatGetSubMatrix_SeqBAIJ,0,
1055        MatConvertSameType_SeqBAIJ,0,0,
1056        MatILUFactor_SeqBAIJ,0,0,
1057        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1058        MatGetValues_SeqBAIJ,0,
1059        0,MatScale_SeqBAIJ,
1060        0};
1061 
1062 /*@C
1063    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1064    compressed row) format.  For good matrix assembly performance the
1065    user should preallocate the matrix storage by setting the parameter nz
1066    (or nzz).  By setting these parameters accurately, performance can be
1067    increased by more than a factor of 50.
1068 
1069    Input Parameters:
1070 .  comm - MPI communicator, set to MPI_COMM_SELF
1071 .  bs - size of block
1072 .  m - number of rows
1073 .  n - number of columns
1074 .  nz - number of block nonzeros per block row (same for all rows)
1075 .  nzz - number of block nonzeros per block row or PETSC_NULL
1076          (possibly different for each row)
1077 
1078    Output Parameter:
1079 .  A - the matrix
1080 
1081    Notes:
1082    The block AIJ format is fully compatible with standard Fortran 77
1083    storage.  That is, the stored row and column indices can begin at
1084    either one (as in Fortran) or zero.  See the users' manual for details.
1085 
1086    Specify the preallocated storage with either nz or nnz (not both).
1087    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1088    allocation.  For additional details, see the users manual chapter on
1089    matrices and the file $(PETSC_DIR)/Performance.
1090 
1091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1092 @*/
1093 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1094 {
1095   Mat         B;
1096   Mat_SeqBAIJ *b;
1097   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1098 
1099   if (mbs*bs!=m || nbs*bs!=n)
1100     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1101 
1102   *A = 0;
1103   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1104   PLogObjectCreate(B);
1105   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1106   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1107   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1108   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1109   if (!flg) {
1110     switch (bs) {
1111       case 1:
1112         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1113         B->ops.solve           = MatSolve_SeqBAIJ_1;
1114         break;
1115       case 2:
1116         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1117         B->ops.solve           = MatSolve_SeqBAIJ_2;
1118         break;
1119       case 3:
1120         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1121         B->ops.solve           = MatSolve_SeqBAIJ_3;
1122         break;
1123       case 4:
1124         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1125         B->ops.solve           = MatSolve_SeqBAIJ_4;
1126         break;
1127       case 5:
1128         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1129         B->ops.solve           = MatSolve_SeqBAIJ_5;
1130         break;
1131     }
1132   }
1133   B->destroy          = MatDestroy_SeqBAIJ;
1134   B->view             = MatView_SeqBAIJ;
1135   B->factor           = 0;
1136   B->lupivotthreshold = 1.0;
1137   b->row              = 0;
1138   b->col              = 0;
1139   b->reallocs         = 0;
1140 
1141   b->m       = m; B->m = m; B->M = m;
1142   b->n       = n; B->n = n; B->N = n;
1143   b->mbs     = mbs;
1144   b->nbs     = nbs;
1145   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1146   if (nnz == PETSC_NULL) {
1147     if (nz == PETSC_DEFAULT) nz = 5;
1148     else if (nz <= 0)        nz = 1;
1149     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1150     nz = nz*mbs;
1151   }
1152   else {
1153     nz = 0;
1154     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1155   }
1156 
1157   /* allocate the matrix space */
1158   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1159   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1160   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1161   b->j  = (int *) (b->a + nz*bs2);
1162   PetscMemzero(b->j,nz*sizeof(int));
1163   b->i  = b->j + nz;
1164   b->singlemalloc = 1;
1165 
1166   b->i[0] = 0;
1167   for (i=1; i<mbs+1; i++) {
1168     b->i[i] = b->i[i-1] + b->imax[i-1];
1169   }
1170 
1171   /* b->ilen will count nonzeros in each block row so far. */
1172   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1173   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1174   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1175 
1176   b->bs               = bs;
1177   b->bs2              = bs2;
1178   b->mbs              = mbs;
1179   b->nz               = 0;
1180   b->maxnz            = nz;
1181   b->sorted           = 0;
1182   b->roworiented      = 1;
1183   b->nonew            = 0;
1184   b->diag             = 0;
1185   b->solve_work       = 0;
1186   b->mult_work        = 0;
1187   b->spptr            = 0;
1188   *A = B;
1189   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1190   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1191   return 0;
1192 }
1193 
1194 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1195 {
1196   Mat         C;
1197   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1198   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1199 
1200   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1201 
1202   *B = 0;
1203   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1204   PLogObjectCreate(C);
1205   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1206   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1207   C->destroy    = MatDestroy_SeqBAIJ;
1208   C->view       = MatView_SeqBAIJ;
1209   C->factor     = A->factor;
1210   c->row        = 0;
1211   c->col        = 0;
1212   C->assembled  = PETSC_TRUE;
1213 
1214   c->m = C->m   = a->m;
1215   c->n = C->n   = a->n;
1216   C->M          = a->m;
1217   C->N          = a->n;
1218 
1219   c->bs         = a->bs;
1220   c->bs2        = a->bs2;
1221   c->mbs        = a->mbs;
1222   c->nbs        = a->nbs;
1223 
1224   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1225   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1226   for ( i=0; i<mbs; i++ ) {
1227     c->imax[i] = a->imax[i];
1228     c->ilen[i] = a->ilen[i];
1229   }
1230 
1231   /* allocate the matrix space */
1232   c->singlemalloc = 1;
1233   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1234   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1235   c->j  = (int *) (c->a + nz*bs2);
1236   c->i  = c->j + nz;
1237   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1238   if (mbs > 0) {
1239     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1240     if (cpvalues == COPY_VALUES) {
1241       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1242     }
1243   }
1244 
1245   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1246   c->sorted      = a->sorted;
1247   c->roworiented = a->roworiented;
1248   c->nonew       = a->nonew;
1249 
1250   if (a->diag) {
1251     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1252     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1253     for ( i=0; i<mbs; i++ ) {
1254       c->diag[i] = a->diag[i];
1255     }
1256   }
1257   else c->diag          = 0;
1258   c->nz                 = a->nz;
1259   c->maxnz              = a->maxnz;
1260   c->solve_work         = 0;
1261   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1262   c->mult_work          = 0;
1263   *B = C;
1264   return 0;
1265 }
1266 
1267 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1268 {
1269   Mat_SeqBAIJ  *a;
1270   Mat          B;
1271   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1272   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1273   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1274   int          *masked, nmask,tmp,bs2,ishift;
1275   Scalar       *aa;
1276   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1277 
1278   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1279   bs2  = bs*bs;
1280 
1281   MPI_Comm_size(comm,&size);
1282   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1283   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1284   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1285   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1286   M = header[1]; N = header[2]; nz = header[3];
1287 
1288   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1289 
1290   /*
1291      This code adds extra rows to make sure the number of rows is
1292     divisible by the blocksize
1293   */
1294   mbs        = M/bs;
1295   extra_rows = bs - M + bs*(mbs);
1296   if (extra_rows == bs) extra_rows = 0;
1297   else                  mbs++;
1298   if (extra_rows) {
1299     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
1300   }
1301 
1302   /* read in row lengths */
1303   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1304   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1305   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1306 
1307   /* read in column indices */
1308   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1309   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1310   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1311 
1312   /* loop over row lengths determining block row lengths */
1313   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1314   PetscMemzero(browlengths,mbs*sizeof(int));
1315   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1316   PetscMemzero(mask,mbs*sizeof(int));
1317   masked = mask + mbs;
1318   rowcount = 0; nzcount = 0;
1319   for ( i=0; i<mbs; i++ ) {
1320     nmask = 0;
1321     for ( j=0; j<bs; j++ ) {
1322       kmax = rowlengths[rowcount];
1323       for ( k=0; k<kmax; k++ ) {
1324         tmp = jj[nzcount++]/bs;
1325         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1326       }
1327       rowcount++;
1328     }
1329     browlengths[i] += nmask;
1330     /* zero out the mask elements we set */
1331     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1332   }
1333 
1334   /* create our matrix */
1335   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1336          CHKERRQ(ierr);
1337   B = *A;
1338   a = (Mat_SeqBAIJ *) B->data;
1339 
1340   /* set matrix "i" values */
1341   a->i[0] = 0;
1342   for ( i=1; i<= mbs; i++ ) {
1343     a->i[i]      = a->i[i-1] + browlengths[i-1];
1344     a->ilen[i-1] = browlengths[i-1];
1345   }
1346   a->nz         = 0;
1347   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1348 
1349   /* read in nonzero values */
1350   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1351   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1352   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1353 
1354   /* set "a" and "j" values into matrix */
1355   nzcount = 0; jcount = 0;
1356   for ( i=0; i<mbs; i++ ) {
1357     nzcountb = nzcount;
1358     nmask    = 0;
1359     for ( j=0; j<bs; j++ ) {
1360       kmax = rowlengths[i*bs+j];
1361       for ( k=0; k<kmax; k++ ) {
1362         tmp = jj[nzcount++]/bs;
1363 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1364       }
1365       rowcount++;
1366     }
1367     /* sort the masked values */
1368     PetscSortInt(nmask,masked);
1369 
1370     /* set "j" values into matrix */
1371     maskcount = 1;
1372     for ( j=0; j<nmask; j++ ) {
1373       a->j[jcount++]  = masked[j];
1374       mask[masked[j]] = maskcount++;
1375     }
1376     /* set "a" values into matrix */
1377     ishift = bs2*a->i[i];
1378     for ( j=0; j<bs; j++ ) {
1379       kmax = rowlengths[i*bs+j];
1380       for ( k=0; k<kmax; k++ ) {
1381         tmp    = jj[nzcountb]/bs ;
1382         block  = mask[tmp] - 1;
1383         point  = jj[nzcountb] - bs*tmp;
1384         idx    = ishift + bs2*block + j + bs*point;
1385         a->a[idx] = aa[nzcountb++];
1386       }
1387     }
1388     /* zero out the mask elements we set */
1389     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1390   }
1391   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1392 
1393   PetscFree(rowlengths);
1394   PetscFree(browlengths);
1395   PetscFree(aa);
1396   PetscFree(jj);
1397   PetscFree(mask);
1398 
1399   B->assembled = PETSC_TRUE;
1400 
1401   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1402   if (flg) {
1403     Viewer tviewer;
1404     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1405     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1406     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1407     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1408   }
1409   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1410   if (flg) {
1411     Viewer tviewer;
1412     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1413     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1414     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1415     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1416   }
1417   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1418   if (flg) {
1419     Viewer tviewer;
1420     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1421     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1422     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1423   }
1424   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1425   if (flg) {
1426     Viewer tviewer;
1427     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1428     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1429     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1430     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1431   }
1432   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1433   if (flg) {
1434     Viewer tviewer;
1435     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1436     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1437     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1438     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1439   }
1440   return 0;
1441 }
1442 
1443 
1444 
1445