xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ab94cc7af4fcd5c63a93b94bdec4707d4e0807cd)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: baij.c,v 1.38 1996/04/15 16:21:01 balay Exp balay $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 #include "baij.h"
11 #include "vec/vecimpl.h"
12 #include "inline/spops.h"
13 #include "petsc.h"
14 
15 extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
16 
17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
18 {
19   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
21 
22   /*
23      this is tacky: In the future when we have written special factorization
24      and solve routines for the identity permutation we should use a
25      stride index set instead of the general one.
26   */
27   if (type  == ORDER_NATURAL) {
28     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
29     for ( i=0; i<n; i++ ) idx[i] = i;
30     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
31     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
32     PetscFree(idx);
33     ISSetPermutation(*rperm);
34     ISSetPermutation(*cperm);
35     ISSetIdentity(*rperm);
36     ISSetIdentity(*cperm);
37     return 0;
38   }
39 
40   MatReorderingRegisterAll();
41   ishift = 0;
42   oshift = -MatReorderingIndexShift[(int)type];
43   if (MatReorderingRequiresSymmetric[(int)type]) {
44     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45     CHKERRQ(ierr);
46     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
47     PetscFree(ia); PetscFree(ja);
48   } else {
49     if (ishift == oshift) {
50       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51     }
52     else if (ishift == -1) {
53       /* temporarily subtract 1 from i and j indices */
54       int nz = a->i[a->n] - 1;
55       for ( i=0; i<nz; i++ ) a->j[i]--;
56       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58       for ( i=0; i<nz; i++ ) a->j[i]++;
59       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60     } else {
61       /* temporarily add 1 to i and j indices */
62       int nz = a->i[a->n] - 1;
63       for ( i=0; i<nz; i++ ) a->j[i]++;
64       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66       for ( i=0; i<nz; i++ ) a->j[i]--;
67       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68     }
69   }
70   return 0;
71 }
72 
73 /*
74      Adds diagonal pointers to sparse matrix structure.
75 */
76 
77 int MatMarkDiag_SeqBAIJ(Mat A)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,j, *diag, m = a->mbs;
81 
82   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83   PLogObjectMemory(A,(m+1)*sizeof(int));
84   for ( i=0; i<m; i++ ) {
85     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86       if (a->j[j] == i) {
87         diag[i] = j;
88         break;
89       }
90     }
91   }
92   a->diag = diag;
93   return 0;
94 }
95 
96 #include "draw.h"
97 #include "pinclude/pviewer.h"
98 #include "sys.h"
99 
100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
101 {
102   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
103   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
104   Scalar      *aa;
105 
106   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
107   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
108   col_lens[0] = MAT_COOKIE;
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   return 0;
153 }
154 
155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
156 {
157   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
158   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
159   FILE        *fd;
160   char        *outputname;
161 
162   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
163   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
164   ierr = ViewerGetFormat(viewer,&format);
165   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
166     fprintf(fd,"  block size is %d\n",bs);
167   }
168   else if (format == ASCII_FORMAT_MATLAB) {
169     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
170   }
171   else if (format == ASCII_FORMAT_COMMON) {
172     for ( i=0; i<a->mbs; i++ ) {
173       for ( j=0; j<bs; j++ ) {
174         fprintf(fd,"row %d:",i*bs+j);
175         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176           for ( l=0; l<bs; l++ ) {
177 #if defined(PETSC_COMPLEX)
178           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
180               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
181           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
183 #else
184           if (a->a[bs2*k + l*bs + j] != 0.0)
185             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
186 #endif
187           }
188         }
189         fprintf(fd,"\n");
190       }
191     }
192   }
193   else {
194     for ( i=0; i<a->mbs; i++ ) {
195       for ( j=0; j<bs; j++ ) {
196         fprintf(fd,"row %d:",i*bs+j);
197         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198           for ( l=0; l<bs; l++ ) {
199 #if defined(PETSC_COMPLEX)
200           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
201             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
202               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
203           }
204           else {
205             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
206           }
207 #else
208             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
209 #endif
210           }
211         }
212         fprintf(fd,"\n");
213       }
214     }
215   }
216   fflush(fd);
217   return 0;
218 }
219 
220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
221 {
222   Mat         A = (Mat) obj;
223   ViewerType  vtype;
224   int         ierr;
225 
226   if (!viewer) {
227     viewer = STDOUT_VIEWER_SELF;
228   }
229 
230   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
231   if (vtype == MATLAB_VIEWER) {
232     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
233   }
234   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235     return MatView_SeqBAIJ_ASCII(A,viewer);
236   }
237   else if (vtype == BINARY_FILE_VIEWER) {
238     return MatView_SeqBAIJ_Binary(A,viewer);
239   }
240   else if (vtype == DRAW_VIEWER) {
241     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
242   }
243   return 0;
244 }
245 
246 #define CHUNKSIZE  10
247 
248 /* This version has row oriented v  */
249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250 {
251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
255   int          ridx,cidx,bs2=a->bs2;
256   Scalar      *ap,value,*aa=a->a,*bap;
257 
258   for ( k=0; k<m; k++ ) { /* loop over added rows */
259     row  = im[k]; brow = row/bs;
260     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
262     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
263     rmax = imax[brow]; nrow = ailen[brow];
264     low = 0;
265     for ( l=0; l<n; l++ ) { /* loop over added columns */
266       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268       col = in[l]; bcol = col/bs;
269       ridx = row % bs; cidx = col % bs;
270       if (roworiented) {
271         value = *v++;
272       }
273       else {
274         value = v[k + l*m];
275       }
276       if (!sorted) low = 0; high = nrow;
277       while (high-low > 5) {
278         t = (low+high)/2;
279         if (rp[t] > bcol) high = t;
280         else             low  = t;
281       }
282       for ( i=low; i<high; i++ ) {
283         if (rp[i] > bcol) break;
284         if (rp[i] == bcol) {
285           bap  = ap +  bs2*i + bs*cidx + ridx;
286           if (is == ADD_VALUES) *bap += value;
287           else                  *bap  = value;
288           goto noinsert;
289         }
290       }
291       if (nonew) goto noinsert;
292       if (nrow >= rmax) {
293         /* there is no extra room in row, therefore enlarge */
294         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295         Scalar *new_a;
296 
297         /* malloc new storage space */
298         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
300         new_j   = (int *) (new_a + bs2*new_nz);
301         new_i   = new_j + new_nz;
302 
303         /* copy over old data into new slots */
304         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
305         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
306         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
307         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
308         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
309                                                            len*sizeof(int));
310         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
311         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
312         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
313                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
314         /* free up old matrix storage */
315         PetscFree(a->a);
316         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318         a->singlemalloc = 1;
319 
320         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
321         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
322         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323         a->maxnz += CHUNKSIZE;
324         a->reallocs++;
325         a->nz++;
326       }
327       N = nrow++ - 1;
328       /* shift up all the later entries in this row */
329       for ( ii=N; ii>=i; ii-- ) {
330         rp[ii+1] = rp[ii];
331          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332       }
333       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334       rp[i] = bcol;
335       ap[bs2*i + bs*cidx + ridx] = value;
336       noinsert:;
337       low = i;
338     }
339     ailen[brow] = nrow;
340   }
341   return 0;
342 }
343 
344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
345 {
346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
347   *m = a->m; *n = a->n;
348   return 0;
349 }
350 
351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
352 {
353   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
354   *m = 0; *n = a->m;
355   return 0;
356 }
357 
358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
362   Scalar      *aa,*v_i,*aa_i;
363 
364   bs  = a->bs;
365   ai  = a->i;
366   aj  = a->j;
367   aa  = a->a;
368   bs2 = a->bs2;
369 
370   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
371 
372   bn  = row/bs;   /* Block number */
373   bp  = row % bs; /* Block Position */
374   M   = ai[bn+1] - ai[bn];
375   *nz = bs*M;
376 
377   if (v) {
378     *v = 0;
379     if (*nz) {
380       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
381       for ( i=0; i<M; i++ ) { /* for each block in the block row */
382         v_i  = *v + i*bs;
383         aa_i = aa + bs2*(ai[bn] + i);
384         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
385       }
386     }
387   }
388 
389   if (idx) {
390     *idx = 0;
391     if (*nz) {
392       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
393       for ( i=0; i<M; i++ ) { /* for each block in the block row */
394         idx_i = *idx + i*bs;
395         itmp  = bs*aj[ai[bn] + i];
396         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
397       }
398     }
399   }
400   return 0;
401 }
402 
403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
404 {
405   if (idx) {if (*idx) PetscFree(*idx);}
406   if (v)   {if (*v)   PetscFree(*v);}
407   return 0;
408 }
409 
410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411 {
412   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413   Mat         C;
414   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415   int         *rows,*cols,bs2=a->bs2;
416   Scalar      *array=a->a;
417 
418   if (B==PETSC_NULL && mbs!=nbs)
419     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421   PetscMemzero(col,(1+nbs)*sizeof(int));
422 
423   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
424   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
425   PetscFree(col);
426   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
427   cols = rows + bs;
428   for ( i=0; i<mbs; i++ ) {
429     cols[0] = i*bs;
430     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
431     len = ai[i+1] - ai[i];
432     for ( j=0; j<len; j++ ) {
433       rows[0] = (*aj++)*bs;
434       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
435       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
436       array += bs2;
437     }
438   }
439   PetscFree(rows);
440 
441   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
443 
444   if (B != PETSC_NULL) {
445     *B = C;
446   } else {
447     /* This isn't really an in-place transpose */
448     PetscFree(a->a);
449     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
450     if (a->diag) PetscFree(a->diag);
451     if (a->ilen) PetscFree(a->ilen);
452     if (a->imax) PetscFree(a->imax);
453     if (a->solve_work) PetscFree(a->solve_work);
454     PetscFree(a);
455     PetscMemcpy(A,C,sizeof(struct _Mat));
456     PetscHeaderDestroy(C);
457   }
458   return 0;
459 }
460 
461 
462 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
463 {
464   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
465   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
466   int        m = a->m,*ip, N, *ailen = a->ilen;
467   int        mbs = a->mbs, bs2 = a->bs2;
468   Scalar     *aa = a->a, *ap;
469 
470   if (mode == FLUSH_ASSEMBLY) return 0;
471 
472   for ( i=1; i<mbs; i++ ) {
473     /* move each row back by the amount of empty slots (fshift) before it*/
474     fshift += imax[i-1] - ailen[i-1];
475     if (fshift) {
476       ip = aj + ai[i]; ap = aa + bs2*ai[i];
477       N = ailen[i];
478       for ( j=0; j<N; j++ ) {
479         ip[j-fshift] = ip[j];
480         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
481       }
482     }
483     ai[i] = ai[i-1] + ailen[i-1];
484   }
485   if (mbs) {
486     fshift += imax[mbs-1] - ailen[mbs-1];
487     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
488   }
489   /* reset ilen and imax for each row */
490   for ( i=0; i<mbs; i++ ) {
491     ailen[i] = imax[i] = ai[i+1] - ai[i];
492   }
493   a->nz = ai[mbs];
494 
495   /* diagonals may have moved, so kill the diagonal pointers */
496   if (fshift && a->diag) {
497     PetscFree(a->diag);
498     PLogObjectMemory(A,-(m+1)*sizeof(int));
499     a->diag = 0;
500   }
501   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
502   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
503            a->reallocs);
504   return 0;
505 }
506 
507 static int MatZeroEntries_SeqBAIJ(Mat A)
508 {
509   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
510   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
511   return 0;
512 }
513 
514 int MatDestroy_SeqBAIJ(PetscObject obj)
515 {
516   Mat         A  = (Mat) obj;
517   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
518 
519 #if defined(PETSC_LOG)
520   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
521 #endif
522   PetscFree(a->a);
523   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
524   if (a->diag) PetscFree(a->diag);
525   if (a->ilen) PetscFree(a->ilen);
526   if (a->imax) PetscFree(a->imax);
527   if (a->solve_work) PetscFree(a->solve_work);
528   if (a->mult_work) PetscFree(a->mult_work);
529   PetscFree(a);
530   PLogObjectDestroy(A);
531   PetscHeaderDestroy(A);
532   return 0;
533 }
534 
535 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
536 {
537   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
538   if      (op == ROW_ORIENTED)              a->roworiented = 1;
539   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
540   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
541   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
542   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
543   else if (op == ROWS_SORTED ||
544            op == SYMMETRIC_MATRIX ||
545            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
546            op == YES_NEW_DIAGONALS)
547     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
548   else if (op == NO_NEW_DIAGONALS)
549     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
550   else
551     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
552   return 0;
553 }
554 
555 
556 /* -------------------------------------------------------*/
557 /* Should check that shapes of vectors and matrices match */
558 /* -------------------------------------------------------*/
559 #include "pinclude/plapack.h"
560 
561 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
562 {
563   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
564   Scalar          *xg,*y,*zg;
565   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
566   register Scalar x1,x2,x3,x4,x5;
567   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
568   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
569 
570   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
571   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
572 
573   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */
574   else if (zz!=yy){
575     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
576     PetscMemcpy(z,y,m*sizeof(Scalar));
577     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
578   }
579 
580   idx   = a->j;
581   v     = a->a;
582   ii    = a->i;
583 
584   switch (bs) {
585   case 1:
586     for ( i=0; i<mbs; i++ ) {
587       n    = ii[1] - ii[0]; ii++;
588       sum  = 0.0;
589       while (n--) sum += *v++ * x[*idx++];
590       z[i] += sum;
591     }
592     break;
593   case 2:
594     for ( i=0; i<mbs; i++ ) {
595       n  = ii[1] - ii[0]; ii++;
596       sum1 = 0.0; sum2 = 0.0;
597       for ( j=0; j<n; j++ ) {
598         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
599         sum1 += v[0]*x1 + v[2]*x2;
600         sum2 += v[1]*x1 + v[3]*x2;
601         v += 4;
602       }
603       z[0] += sum1; z[1] += sum2;
604       z += 2;
605     }
606     break;
607   case 3:
608     for ( i=0; i<mbs; i++ ) {
609       n  = ii[1] - ii[0]; ii++;
610       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
611       for ( j=0; j<n; j++ ) {
612         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
613         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
614         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
615         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
616         v += 9;
617       }
618       z[0] += sum1; z[1] += sum2; z[2] += sum3;
619       z += 3;
620     }
621     break;
622   case 4:
623     for ( i=0; i<mbs; i++ ) {
624       n  = ii[1] - ii[0]; ii++;
625       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
626       for ( j=0; j<n; j++ ) {
627         xb = x + 4*(*idx++);
628         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
629         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
630         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
631         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
632         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
633         v += 16;
634       }
635       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
636       z += 4;
637     }
638     break;
639   case 5:
640     for ( i=0; i<mbs; i++ ) {
641       n  = ii[1] - ii[0]; ii++;
642       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
643       for ( j=0; j<n; j++ ) {
644         xb = x + 5*(*idx++);
645         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
646         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
647         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
648         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
649         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
650         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
651         v += 25;
652       }
653       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
654       z += 5;
655     }
656     break;
657     /* block sizes larger then 5 by 5 are handled by BLAS */
658   default: {
659       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
660       if (!a->mult_work) {
661         k = PetscMax(a->m,a->n);
662         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
663         CHKPTRQ(a->mult_work);
664       }
665       work = a->mult_work;
666       for ( i=0; i<mbs; i++ ) {
667         n     = ii[1] - ii[0]; ii++;
668         ncols = n*bs;
669         workt = work;
670         for ( j=0; j<n; j++ ) {
671           xb = x + bs*(*idx++);
672           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
673           workt += bs;
674         }
675         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
676         v += n*bs2;
677         z += bs;
678       }
679     }
680   }
681   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
682   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
683   return 0;
684 }
685 
686 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy)
687 {
688   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
689   int         ierr;
690 
691   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
692   PLogFlops(2*(a->bs2)*(a->nz)-a->m);
693   return 0;
694 }
695 
696 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
697 {
698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
699   int         ierr;
700 
701   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
702   PLogFlops(2*(a->bs2)*(a->nz));
703   return 0;
704 }
705 
706 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   Scalar          *xg,*y,*zg,*zb;
710   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
711   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
712   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
713 
714 
715   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
716   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
717 
718   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */
719   else if (zz!=yy){
720     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
721     PetscMemcpy(z,y,N*sizeof(Scalar));
722     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
723   }
724 
725   idx   = a->j;
726   v     = a->a;
727   ii    = a->i;
728 
729   switch (bs) {
730   case 1:
731     for ( i=0; i<mbs; i++ ) {
732       n  = ii[1] - ii[0]; ii++;
733       xb = x + i; x1 = xb[0];
734       ib = idx + ai[i];
735       for ( j=0; j<n; j++ ) {
736         z[ib[j]] += *v++ * x1;
737       }
738     }
739     break;
740   case 2:
741     for ( i=0; i<mbs; i++ ) {
742       n  = ii[1] - ii[0]; ii++;
743       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
744       ib = idx + ai[i];
745       for ( j=0; j<n; j++ ) {
746         rval = ib[j]*2;
747         z[rval++] += v[0]*x1 + v[1]*x2;
748         z[rval++] += v[2]*x1 + v[3]*x2;
749         v += 4;
750       }
751     }
752     break;
753   case 3:
754     for ( i=0; i<mbs; i++ ) {
755       n  = ii[1] - ii[0]; ii++;
756       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       ib = idx + ai[i];
758       for ( j=0; j<n; j++ ) {
759         rval = ib[j]*3;
760         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
761         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
762         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
763         v += 9;
764       }
765     }
766     break;
767   case 4:
768     for ( i=0; i<mbs; i++ ) {
769       n  = ii[1] - ii[0]; ii++;
770       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
771       ib = idx + ai[i];
772       for ( j=0; j<n; j++ ) {
773         rval = ib[j]*4;
774         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
775         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
776         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
777         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
778         v += 16;
779       }
780     }
781     break;
782   case 5:
783     for ( i=0; i<mbs; i++ ) {
784       n  = ii[1] - ii[0]; ii++;
785       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
786       x4 = xb[3];   x5 = xb[4];
787       ib = idx + ai[i];
788       for ( j=0; j<n; j++ ) {
789         rval = ib[j]*5;
790         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
791         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
792         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
793         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
794         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
795         v += 25;
796       }
797     }
798     break;
799       /* block sizes larger then 5 by 5 are handled by BLAS */
800     default: {
801       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
802       if (!a->mult_work) {
803         k = PetscMax(a->m,a->n);
804         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
805         CHKPTRQ(a->mult_work);
806       }
807       work = a->mult_work;
808       for ( i=0; i<mbs; i++ ) {
809         n     = ii[1] - ii[0]; ii++;
810         ncols = n*bs;
811         PetscMemzero(work,ncols*sizeof(Scalar));
812         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
813         v += n*bs2;
814         x += bs;
815         workt = work;
816         for ( j=0; j<n; j++ ) {
817           zb = z + bs*(*idx++);
818           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
819           workt += bs;
820         }
821       }
822     }
823   }
824   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
825   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
826   return 0;
827 }
828 
829 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy)
830 {
831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
832   int         ierr;
833 
834   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
835   PLogFlops(2*(a->bs2)*(a->nz)-a->n);
836   return 0;
837 }
838 
839 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
840 {
841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
842   int         ierr;
843 
844   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
845   PLogFlops(2*(a->bs2)*(a->nz));
846   return 0;
847 }
848 
849 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
850 {
851   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
852   if (nz)  *nz  = a->bs2*a->nz;
853   if (nza) *nza = a->maxnz;
854   if (mem) *mem = (int)A->mem;
855   return 0;
856 }
857 
858 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
859 {
860   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
861 
862   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
863 
864   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
865   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
866       (a->nz != b->nz)) {
867     *flg = PETSC_FALSE; return 0;
868   }
869 
870   /* if the a->i are the same */
871   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
872     *flg = PETSC_FALSE; return 0;
873   }
874 
875   /* if a->j are the same */
876   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
877     *flg = PETSC_FALSE; return 0;
878   }
879 
880   /* if a->a are the same */
881   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
882     *flg = PETSC_FALSE; return 0;
883   }
884   *flg = PETSC_TRUE;
885   return 0;
886 
887 }
888 
889 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
890 {
891   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
892   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
893   Scalar      *x, zero = 0.0,*aa,*aa_j;
894 
895   bs   = a->bs;
896   aa   = a->a;
897   ai   = a->i;
898   aj   = a->j;
899   ambs = a->mbs;
900   bs2  = a->bs2;
901 
902   VecSet(&zero,v);
903   VecGetArray(v,&x); VecGetLocalSize(v,&n);
904   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
905   for ( i=0; i<ambs; i++ ) {
906     for ( j=ai[i]; j<ai[i+1]; j++ ) {
907       if (aj[j] == i) {
908         row  = i*bs;
909         aa_j = aa+j*bs2;
910         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
911         break;
912       }
913     }
914   }
915   return 0;
916 }
917 
918 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
919 {
920   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
921   Scalar      *l,*r,x,*v,*aa,*li,*ri;
922   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
923 
924   ai  = a->i;
925   aj  = a->j;
926   aa  = a->a;
927   m   = a->m;
928   n   = a->n;
929   bs  = a->bs;
930   mbs = a->mbs;
931   bs2 = a->bs2;
932   if (ll) {
933     VecGetArray(ll,&l); VecGetSize(ll,&lm);
934     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
935     for ( i=0; i<mbs; i++ ) { /* for each block row */
936       M  = ai[i+1] - ai[i];
937       li = l + i*bs;
938       v  = aa + bs2*ai[i];
939       for ( j=0; j<M; j++ ) { /* for each block */
940         for ( k=0; k<bs2; k++ ) {
941           (*v++) *= li[k%bs];
942         }
943       }
944     }
945   }
946 
947   if (rr) {
948     VecGetArray(rr,&r); VecGetSize(rr,&rn);
949     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
950     for ( i=0; i<mbs; i++ ) { /* for each block row */
951       M  = ai[i+1] - ai[i];
952       v  = aa + bs2*ai[i];
953       for ( j=0; j<M; j++ ) { /* for each block */
954         ri = r + bs*aj[ai[i]+j];
955         for ( k=0; k<bs; k++ ) {
956           x = ri[k];
957           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
958         }
959       }
960     }
961   }
962   return 0;
963 }
964 
965 
966 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
967 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
968 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
969 extern int MatSolveAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
970 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
971 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
972 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
973 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
974 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
975 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
976 
977 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
978 {
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
980   Scalar      *v = a->a;
981   double      sum = 0.0;
982   int         i,nz=a->nz,bs2=a->bs2;
983 
984   if (type == NORM_FROBENIUS) {
985     for (i=0; i< bs2*nz; i++ ) {
986 #if defined(PETSC_COMPLEX)
987       sum += real(conj(*v)*(*v)); v++;
988 #else
989       sum += (*v)*(*v); v++;
990 #endif
991     }
992     *norm = sqrt(sum);
993   }
994   else {
995     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
996   }
997   return 0;
998 }
999 
1000 /*
1001      note: This can only work for identity for row and col. It would
1002    be good to check this and otherwise generate an error.
1003 */
1004 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1005 {
1006   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1007   Mat         outA;
1008   int         ierr;
1009 
1010   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1011 
1012   outA          = inA;
1013   inA->factor   = FACTOR_LU;
1014   a->row        = row;
1015   a->col        = col;
1016 
1017   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1018 
1019   if (!a->diag) {
1020     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1021   }
1022   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1023   return 0;
1024 }
1025 
1026 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1027 {
1028   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1029   int         one = 1, totalnz = a->bs2*a->nz;
1030   BLscal_( &totalnz, alpha, a->a, &one );
1031   PLogFlops(totalnz);
1032   return 0;
1033 }
1034 
1035 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1036 {
1037   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1038   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1039   int        *ai = a->i, *ailen = a->ilen;
1040   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1041   Scalar     *ap, *aa = a->a, zero = 0.0;
1042 
1043   for ( k=0; k<m; k++ ) { /* loop over rows */
1044     row  = im[k]; brow = row/bs;
1045     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1046     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1047     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1048     nrow = ailen[brow];
1049     for ( l=0; l<n; l++ ) { /* loop over columns */
1050       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1051       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1052       col  = in[l] ;
1053       bcol = col/bs;
1054       cidx = col%bs;
1055       ridx = row%bs;
1056       high = nrow;
1057       low  = 0; /* assume unsorted */
1058       while (high-low > 5) {
1059         t = (low+high)/2;
1060         if (rp[t] > bcol) high = t;
1061         else             low  = t;
1062       }
1063       for ( i=low; i<high; i++ ) {
1064         if (rp[i] > bcol) break;
1065         if (rp[i] == bcol) {
1066           *v++ = ap[bs2*i+bs*cidx+ridx];
1067           goto finished;
1068         }
1069       }
1070       *v++ = zero;
1071       finished:;
1072     }
1073   }
1074   return 0;
1075 }
1076 
1077 int MatPrintHelp_SeqBAIJ(Mat A)
1078 {
1079   static int called = 0;
1080 
1081   if (called) return 0; else called = 1;
1082   return 0;
1083 }
1084 /* -------------------------------------------------------------------*/
1085 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1086        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1087        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1088        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1089        MatSolve_SeqBAIJ,MatSolveAdd_SeqBAIJ,
1090        0,0,
1091        MatLUFactor_SeqBAIJ,0,
1092        0,
1093        MatTranspose_SeqBAIJ,
1094        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1095        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1096        0,MatAssemblyEnd_SeqBAIJ,
1097        0,
1098        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1099        MatGetReordering_SeqBAIJ,
1100        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1101        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1102        MatILUFactorSymbolic_SeqBAIJ,0,
1103        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1104        0,0,
1105        MatConvertSameType_SeqBAIJ,0,0,
1106        MatILUFactor_SeqBAIJ,0,0,
1107        0,0,
1108        MatGetValues_SeqBAIJ,0,
1109        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1110        0};
1111 
1112 /*@C
1113    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1114    compressed row) format.  For good matrix assembly performance the
1115    user should preallocate the matrix storage by setting the parameter nz
1116    (or nzz).  By setting these parameters accurately, performance can be
1117    increased by more than a factor of 50.
1118 
1119    Input Parameters:
1120 .  comm - MPI communicator, set to MPI_COMM_SELF
1121 .  bs - size of block
1122 .  m - number of rows
1123 .  n - number of columns
1124 .  nz - number of block nonzeros per block row (same for all rows)
1125 .  nzz - number of block nonzeros per block row or PETSC_NULL
1126          (possibly different for each row)
1127 
1128    Output Parameter:
1129 .  A - the matrix
1130 
1131    Notes:
1132    The block AIJ format is fully compatible with standard Fortran 77
1133    storage.  That is, the stored row and column indices can begin at
1134    either one (as in Fortran) or zero.  See the users' manual for details.
1135 
1136    Specify the preallocated storage with either nz or nnz (not both).
1137    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1138    allocation.  For additional details, see the users manual chapter on
1139    matrices and the file $(PETSC_DIR)/Performance.
1140 
1141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1142 @*/
1143 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1144 {
1145   Mat         B;
1146   Mat_SeqBAIJ *b;
1147   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1148 
1149   if (mbs*bs!=m || nbs*bs!=n)
1150     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1151 
1152   *A = 0;
1153   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1154   PLogObjectCreate(B);
1155   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1156   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1157   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1158   switch (bs) {
1159     case 1:
1160       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1161       break;
1162     case 2:
1163       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1164       break;
1165     case 3:
1166       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1167       break;
1168     case 4:
1169       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1170       break;
1171     case 5:
1172       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1173       break;
1174   }
1175   B->destroy          = MatDestroy_SeqBAIJ;
1176   B->view             = MatView_SeqBAIJ;
1177   B->factor           = 0;
1178   B->lupivotthreshold = 1.0;
1179   b->row              = 0;
1180   b->col              = 0;
1181   b->reallocs         = 0;
1182 
1183   b->m       = m; B->m = m; B->M = m;
1184   b->n       = n; B->n = n; B->N = n;
1185   b->mbs     = mbs;
1186   b->nbs     = nbs;
1187   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1188   if (nnz == PETSC_NULL) {
1189     if (nz == PETSC_DEFAULT) nz = 5;
1190     else if (nz <= 0)        nz = 1;
1191     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1192     nz = nz*mbs;
1193   }
1194   else {
1195     nz = 0;
1196     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1197   }
1198 
1199   /* allocate the matrix space */
1200   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1201   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1202   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1203   b->j  = (int *) (b->a + nz*bs2);
1204   PetscMemzero(b->j,nz*sizeof(int));
1205   b->i  = b->j + nz;
1206   b->singlemalloc = 1;
1207 
1208   b->i[0] = 0;
1209   for (i=1; i<mbs+1; i++) {
1210     b->i[i] = b->i[i-1] + b->imax[i-1];
1211   }
1212 
1213   /* b->ilen will count nonzeros in each block row so far. */
1214   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1215   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1216   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1217 
1218   b->bs               = bs;
1219   b->bs2              = bs2;
1220   b->mbs              = mbs;
1221   b->nz               = 0;
1222   b->maxnz            = nz;
1223   b->sorted           = 0;
1224   b->roworiented      = 1;
1225   b->nonew            = 0;
1226   b->diag             = 0;
1227   b->solve_work       = 0;
1228   b->mult_work        = 0;
1229   b->spptr            = 0;
1230   *A = B;
1231   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1232   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1233   return 0;
1234 }
1235 
1236 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1237 {
1238   Mat         C;
1239   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1240   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1241 
1242   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1243 
1244   *B = 0;
1245   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1246   PLogObjectCreate(C);
1247   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1248   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1249   C->destroy    = MatDestroy_SeqBAIJ;
1250   C->view       = MatView_SeqBAIJ;
1251   C->factor     = A->factor;
1252   c->row        = 0;
1253   c->col        = 0;
1254   C->assembled  = PETSC_TRUE;
1255 
1256   c->m = C->m   = a->m;
1257   c->n = C->n   = a->n;
1258   C->M          = a->m;
1259   C->N          = a->n;
1260 
1261   c->bs         = a->bs;
1262   c->bs2        = a->bs2;
1263   c->mbs        = a->mbs;
1264   c->nbs        = a->nbs;
1265 
1266   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1267   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1268   for ( i=0; i<mbs; i++ ) {
1269     c->imax[i] = a->imax[i];
1270     c->ilen[i] = a->ilen[i];
1271   }
1272 
1273   /* allocate the matrix space */
1274   c->singlemalloc = 1;
1275   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1276   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1277   c->j  = (int *) (c->a + nz*bs2);
1278   c->i  = c->j + nz;
1279   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1280   if (mbs > 0) {
1281     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1282     if (cpvalues == COPY_VALUES) {
1283       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1284     }
1285   }
1286 
1287   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1288   c->sorted      = a->sorted;
1289   c->roworiented = a->roworiented;
1290   c->nonew       = a->nonew;
1291 
1292   if (a->diag) {
1293     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1294     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1295     for ( i=0; i<mbs; i++ ) {
1296       c->diag[i] = a->diag[i];
1297     }
1298   }
1299   else c->diag          = 0;
1300   c->nz                 = a->nz;
1301   c->maxnz              = a->maxnz;
1302   c->solve_work         = 0;
1303   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1304   c->mult_work          = 0;
1305   *B = C;
1306   return 0;
1307 }
1308 
1309 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1310 {
1311   Mat_SeqBAIJ  *a;
1312   Mat          B;
1313   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1314   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1315   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1316   int          *masked, nmask,tmp,bs2,ishift;
1317   Scalar       *aa;
1318   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1319 
1320   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1321   bs2  = bs*bs;
1322 
1323   MPI_Comm_size(comm,&size);
1324   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1325   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1326   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1327   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1328   M = header[1]; N = header[2]; nz = header[3];
1329 
1330   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1331 
1332   /*
1333      This code adds extra rows to make sure the number of rows is
1334     divisible by the blocksize
1335   */
1336   mbs        = M/bs;
1337   extra_rows = bs - M + bs*(mbs);
1338   if (extra_rows == bs) extra_rows = 0;
1339   else                  mbs++;
1340   if (extra_rows) {
1341     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
1342   }
1343 
1344   /* read in row lengths */
1345   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1346   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1347   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1348 
1349   /* read in column indices */
1350   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1351   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1352   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1353 
1354   /* loop over row lengths determining block row lengths */
1355   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1356   PetscMemzero(browlengths,mbs*sizeof(int));
1357   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1358   PetscMemzero(mask,mbs*sizeof(int));
1359   masked = mask + mbs;
1360   rowcount = 0; nzcount = 0;
1361   for ( i=0; i<mbs; i++ ) {
1362     nmask = 0;
1363     for ( j=0; j<bs; j++ ) {
1364       kmax = rowlengths[rowcount];
1365       for ( k=0; k<kmax; k++ ) {
1366         tmp = jj[nzcount++]/bs;
1367         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1368       }
1369       rowcount++;
1370     }
1371     browlengths[i] += nmask;
1372     /* zero out the mask elements we set */
1373     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1374   }
1375 
1376   /* create our matrix */
1377   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1378          CHKERRQ(ierr);
1379   B = *A;
1380   a = (Mat_SeqBAIJ *) B->data;
1381 
1382   /* set matrix "i" values */
1383   a->i[0] = 0;
1384   for ( i=1; i<= mbs; i++ ) {
1385     a->i[i]      = a->i[i-1] + browlengths[i-1];
1386     a->ilen[i-1] = browlengths[i-1];
1387   }
1388   a->nz         = 0;
1389   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1390 
1391   /* read in nonzero values */
1392   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1393   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
1394   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1395 
1396   /* set "a" and "j" values into matrix */
1397   nzcount = 0; jcount = 0;
1398   for ( i=0; i<mbs; i++ ) {
1399     nzcountb = nzcount;
1400     nmask    = 0;
1401     for ( j=0; j<bs; j++ ) {
1402       kmax = rowlengths[i*bs+j];
1403       for ( k=0; k<kmax; k++ ) {
1404         tmp = jj[nzcount++]/bs;
1405 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1406       }
1407       rowcount++;
1408     }
1409     /* sort the masked values */
1410     PetscSortInt(nmask,masked);
1411 
1412     /* set "j" values into matrix */
1413     maskcount = 1;
1414     for ( j=0; j<nmask; j++ ) {
1415       a->j[jcount++]  = masked[j];
1416       mask[masked[j]] = maskcount++;
1417     }
1418     /* set "a" values into matrix */
1419     ishift = bs2*a->i[i];
1420     for ( j=0; j<bs; j++ ) {
1421       kmax = rowlengths[i*bs+j];
1422       for ( k=0; k<kmax; k++ ) {
1423         tmp    = jj[nzcountb]/bs ;
1424         block  = mask[tmp] - 1;
1425         point  = jj[nzcountb] - bs*tmp;
1426         idx    = ishift + bs2*block + j + bs*point;
1427         a->a[idx] = aa[nzcountb++];
1428       }
1429     }
1430     /* zero out the mask elements we set */
1431     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1432   }
1433   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1434 
1435   PetscFree(rowlengths);
1436   PetscFree(browlengths);
1437   PetscFree(aa);
1438   PetscFree(jj);
1439   PetscFree(mask);
1440 
1441   B->assembled = PETSC_TRUE;
1442 
1443   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1444   if (flg) {
1445     Viewer tviewer;
1446     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1447     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
1448     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1449     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1450   }
1451   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1452   if (flg) {
1453     Viewer tviewer;
1454     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1455     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
1456     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1457     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1458   }
1459   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1460   if (flg) {
1461     Viewer tviewer;
1462     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1463     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1464     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1465   }
1466   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1467   if (flg) {
1468     Viewer tviewer;
1469     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
1470     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
1471     ierr = MatView(B,tviewer); CHKERRQ(ierr);
1472     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1473   }
1474   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1475   if (flg) {
1476     Viewer tviewer;
1477     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
1478     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
1479     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
1480     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1481   }
1482   return 0;
1483 }
1484 
1485 
1486 
1487