xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 /*
3     Defines the basic matrix operations for the SBAIJ (compressed row)
4   matrix storage format.
5 */
6 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
7 #include "src/inline/spops.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 
10 #define CHUNKSIZE  10
11 
12 /*
13      Checks for missing diagonals
14 */
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18 {
19   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20   PetscErrorCode ierr;
21   int         *diag,*jj = a->j,i;
22 
23   PetscFunctionBegin;
24   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
25   diag = a->diag;
26   for (i=0; i<a->mbs; i++) {
27     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28   }
29   PetscFunctionReturn(0);
30 }
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35 {
36   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37   PetscErrorCode ierr;
38   int          i,mbs = a->mbs;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int nz = a->i[n];
63     for (i=0; i<nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int nz = a->i[n]-1;
84     for (i=0; i<nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   PetscErrorCode ierr;
204   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
205   MatScalar    *aa,*aa_i;
206   PetscScalar  *v_i;
207 
208   PetscFunctionBegin;
209   bs  = a->bs;
210   ai  = a->i;
211   aj  = a->j;
212   aa  = a->a;
213   bs2 = a->bs2;
214 
215   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
216 
217   bn  = row/bs;   /* Block number */
218   bp  = row % bs; /* Block position */
219   M   = ai[bn+1] - ai[bn];
220   *ncols = bs*M;
221 
222   if (v) {
223     *v = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         v_i  = *v + i*bs;
228         aa_i = aa + bs2*(ai[bn] + i);
229         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
230       }
231     }
232   }
233 
234   if (cols) {
235     *cols = 0;
236     if (*ncols) {
237       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
238       for (i=0; i<M; i++) { /* for each block in the block row */
239         cols_i = *cols + i*bs;
240         itmp  = bs*aj[ai[bn] + i];
241         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
242       }
243     }
244   }
245 
246   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
247   /* this segment is currently removed, so only entries in the upper triangle are obtained */
248 #ifdef column_search
249   v_i    = *v    + M*bs;
250   cols_i = *cols + M*bs;
251   for (i=0; i<bn; i++){ /* for each block row */
252     M = ai[i+1] - ai[i];
253     for (j=0; j<M; j++){
254       itmp = aj[ai[i] + j];    /* block column value */
255       if (itmp == bn){
256         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
257         for (k=0; k<bs; k++) {
258           *cols_i++ = i*bs+k;
259           *v_i++    = aa_i[k];
260         }
261         *ncols += bs;
262         break;
263       }
264     }
265   }
266 #endif
267 
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
274 {
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
279   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
286 {
287   PetscErrorCode ierr;
288   PetscFunctionBegin;
289   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
296 {
297   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
298   PetscErrorCode ierr;
299   int i,j,bs = a->bs,k,l,bs2=a->bs2;
300   char              *name;
301   PetscViewerFormat format;
302 
303   PetscFunctionBegin;
304   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
305   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
306   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
307     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
308   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
309     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
310   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
311     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
312     for (i=0; i<a->mbs; i++) {
313       for (j=0; j<bs; j++) {
314         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
315         for (k=a->i[i]; k<a->i[i+1]; k++) {
316           for (l=0; l<bs; l++) {
317 #if defined(PETSC_USE_COMPLEX)
318             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
320                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
321             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
322               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
323                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
324             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
325               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
326             }
327 #else
328             if (a->a[bs2*k + l*bs + j] != 0.0) {
329               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
330             }
331 #endif
332           }
333         }
334         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
335       }
336     }
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
338   } else {
339     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
340     for (i=0; i<a->mbs; i++) {
341       for (j=0; j<bs; j++) {
342         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
343         for (k=a->i[i]; k<a->i[i+1]; k++) {
344           for (l=0; l<bs; l++) {
345 #if defined(PETSC_USE_COMPLEX)
346             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
347               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
348                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
350               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
351                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             } else {
353               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
354             }
355 #else
356             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
357 #endif
358           }
359         }
360         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
361       }
362     }
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
364   }
365   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
372 {
373   Mat          A = (Mat) Aa;
374   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
375   PetscErrorCode ierr;
376   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
377   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
378   MatScalar    *aa;
379   MPI_Comm     comm;
380   PetscViewer  viewer;
381 
382   PetscFunctionBegin;
383   /*
384       This is nasty. If this is called from an originally parallel matrix
385    then all processes call this,but only the first has the matrix so the
386    rest should return immediately.
387   */
388   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
389   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
390   if (rank) PetscFunctionReturn(0);
391 
392   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
393 
394   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
395   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
396 
397   /* loop over matrix elements drawing boxes */
398   color = PETSC_DRAW_BLUE;
399   for (i=0,row=0; i<mbs; i++,row+=bs) {
400     for (j=a->i[i]; j<a->i[i+1]; j++) {
401       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
402       x_l = a->j[j]*bs; x_r = x_l + 1.0;
403       aa = a->a + j*bs2;
404       for (k=0; k<bs; k++) {
405         for (l=0; l<bs; l++) {
406           if (PetscRealPart(*aa++) >=  0.) continue;
407           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
408         }
409       }
410     }
411   }
412   color = PETSC_DRAW_CYAN;
413   for (i=0,row=0; i<mbs; i++,row+=bs) {
414     for (j=a->i[i]; j<a->i[i+1]; j++) {
415       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
416       x_l = a->j[j]*bs; x_r = x_l + 1.0;
417       aa = a->a + j*bs2;
418       for (k=0; k<bs; k++) {
419         for (l=0; l<bs; l++) {
420           if (PetscRealPart(*aa++) != 0.) continue;
421           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
422         }
423       }
424     }
425   }
426 
427   color = PETSC_DRAW_RED;
428   for (i=0,row=0; i<mbs; i++,row+=bs) {
429     for (j=a->i[i]; j<a->i[i+1]; j++) {
430       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
431       x_l = a->j[j]*bs; x_r = x_l + 1.0;
432       aa = a->a + j*bs2;
433       for (k=0; k<bs; k++) {
434         for (l=0; l<bs; l++) {
435           if (PetscRealPart(*aa++) <= 0.) continue;
436           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
437         }
438       }
439     }
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
447 {
448   PetscErrorCode ierr;
449   PetscReal    xl,yl,xr,yr,w,h;
450   PetscDraw    draw;
451   PetscTruth   isnull;
452 
453   PetscFunctionBegin;
454 
455   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
456   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
457 
458   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
459   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
460   xr += w;    yr += h;  xl = -w;     yl = -h;
461   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
462   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
463   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatView_SeqSBAIJ"
469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
470 {
471   PetscErrorCode ierr;
472   PetscTruth iascii,isdraw;
473 
474   PetscFunctionBegin;
475   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
476   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
477   if (iascii){
478     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
479   } else if (isdraw) {
480     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
481   } else {
482     Mat B;
483     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
484     ierr = MatView(B,viewer);CHKERRQ(ierr);
485     ierr = MatDestroy(B);CHKERRQ(ierr);
486   }
487   PetscFunctionReturn(0);
488 }
489 
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
494 {
495   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
496   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
497   int        *ai = a->i,*ailen = a->ilen;
498   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
499   MatScalar  *ap,*aa = a->a,zero = 0.0;
500 
501   PetscFunctionBegin;
502   for (k=0; k<m; k++) { /* loop over rows */
503     row  = im[k]; brow = row/bs;
504     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
505     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
506     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
507     nrow = ailen[brow];
508     for (l=0; l<n; l++) { /* loop over columns */
509       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
510       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
511       col  = in[l] ;
512       bcol = col/bs;
513       cidx = col%bs;
514       ridx = row%bs;
515       high = nrow;
516       low  = 0; /* assume unsorted */
517       while (high-low > 5) {
518         t = (low+high)/2;
519         if (rp[t] > bcol) high = t;
520         else             low  = t;
521       }
522       for (i=low; i<high; i++) {
523         if (rp[i] > bcol) break;
524         if (rp[i] == bcol) {
525           *v++ = ap[bs2*i+bs*cidx+ridx];
526           goto finished;
527         }
528       }
529       *v++ = zero;
530       finished:;
531     }
532   }
533   PetscFunctionReturn(0);
534 }
535 
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
540 {
541   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
542   PetscErrorCode ierr;
543   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
544   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
545   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
546   PetscTruth      roworiented=a->roworiented;
547   const MatScalar *value = v;
548   MatScalar       *ap,*aa = a->a,*bap;
549 
550   PetscFunctionBegin;
551   if (roworiented) {
552     stepval = (n-1)*bs;
553   } else {
554     stepval = (m-1)*bs;
555   }
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     if (row < 0) continue;
559 #if defined(PETSC_USE_BOPT_g)
560     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
561 #endif
562     rp   = aj + ai[row];
563     ap   = aa + bs2*ai[row];
564     rmax = imax[row];
565     nrow = ailen[row];
566     low  = 0;
567     for (l=0; l<n; l++) { /* loop over added columns */
568       if (in[l] < 0) continue;
569       col = in[l];
570 #if defined(PETSC_USE_BOPT_g)
571       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
572 #endif
573       if (col < row) continue; /* ignore lower triangular block */
574       if (roworiented) {
575         value = v + k*(stepval+bs)*bs + l*bs;
576       } else {
577         value = v + l*(stepval+bs)*bs + k*bs;
578       }
579       if (!sorted) low = 0; high = nrow;
580       while (high-low > 7) {
581         t = (low+high)/2;
582         if (rp[t] > col) high = t;
583         else             low  = t;
584       }
585       for (i=low; i<high; i++) {
586         if (rp[i] > col) break;
587         if (rp[i] == col) {
588           bap  = ap +  bs2*i;
589           if (roworiented) {
590             if (is == ADD_VALUES) {
591               for (ii=0; ii<bs; ii++,value+=stepval) {
592                 for (jj=ii; jj<bs2; jj+=bs) {
593                   bap[jj] += *value++;
594                 }
595               }
596             } else {
597               for (ii=0; ii<bs; ii++,value+=stepval) {
598                 for (jj=ii; jj<bs2; jj+=bs) {
599                   bap[jj] = *value++;
600                 }
601               }
602             }
603           } else {
604             if (is == ADD_VALUES) {
605               for (ii=0; ii<bs; ii++,value+=stepval) {
606                 for (jj=0; jj<bs; jj++) {
607                   *bap++ += *value++;
608                 }
609               }
610             } else {
611               for (ii=0; ii<bs; ii++,value+=stepval) {
612                 for (jj=0; jj<bs; jj++) {
613                   *bap++  = *value++;
614                 }
615               }
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       if (nonew == 1) goto noinsert2;
622       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
623       if (nrow >= rmax) {
624         /* there is no extra room in row, therefore enlarge */
625         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
626         MatScalar *new_a;
627 
628         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
629 
630         /* malloc new storage space */
631         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
632 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
633         new_j   = (int*)(new_a + bs2*new_nz);
634         new_i   = new_j + new_nz;
635 
636         /* copy over old data into new slots */
637         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
638         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
639         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
640         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
641         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
642         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
643         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
644         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
645         /* free up old matrix storage */
646         ierr = PetscFree(a->a);CHKERRQ(ierr);
647         if (!a->singlemalloc) {
648           ierr = PetscFree(a->i);CHKERRQ(ierr);
649           ierr = PetscFree(a->j);CHKERRQ(ierr);
650         }
651         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
652         a->singlemalloc = PETSC_TRUE;
653 
654         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
655         rmax = imax[row] = imax[row] + CHUNKSIZE;
656         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
657         a->maxnz += bs2*CHUNKSIZE;
658         a->reallocs++;
659         a->nz++;
660       }
661       N = nrow++ - 1;
662       /* shift up all the later entries in this row */
663       for (ii=N; ii>=i; ii--) {
664         rp[ii+1] = rp[ii];
665         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
666       }
667       if (N >= i) {
668         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
669       }
670       rp[i] = col;
671       bap   = ap +  bs2*i;
672       if (roworiented) {
673         for (ii=0; ii<bs; ii++,value+=stepval) {
674           for (jj=ii; jj<bs2; jj+=bs) {
675             bap[jj] = *value++;
676           }
677         }
678       } else {
679         for (ii=0; ii<bs; ii++,value+=stepval) {
680           for (jj=0; jj<bs; jj++) {
681             *bap++  = *value++;
682           }
683         }
684       }
685       noinsert2:;
686       low = i;
687     }
688     ailen[row] = nrow;
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
696 {
697   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
698   PetscErrorCode ierr;
699   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
700   int        m = A->m,*ip,N,*ailen = a->ilen;
701   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
702   MatScalar  *aa = a->a,*ap;
703 
704   PetscFunctionBegin;
705   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
706 
707   if (m) rmax = ailen[0];
708   for (i=1; i<mbs; i++) {
709     /* move each row back by the amount of empty slots (fshift) before it*/
710     fshift += imax[i-1] - ailen[i-1];
711     rmax   = PetscMax(rmax,ailen[i]);
712     if (fshift) {
713       ip = aj + ai[i]; ap = aa + bs2*ai[i];
714       N = ailen[i];
715       for (j=0; j<N; j++) {
716         ip[j-fshift] = ip[j];
717         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719     }
720     ai[i] = ai[i-1] + ailen[i-1];
721   }
722   if (mbs) {
723     fshift += imax[mbs-1] - ailen[mbs-1];
724     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
725   }
726   /* reset ilen and imax for each row */
727   for (i=0; i<mbs; i++) {
728     ailen[i] = imax[i] = ai[i+1] - ai[i];
729   }
730   a->nz = ai[mbs];
731 
732   /* diagonals may have moved, reset it */
733   if (a->diag) {
734     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
735   }
736   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
737            m,A->m,a->bs,fshift*bs2,a->nz*bs2);
738   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
739            a->reallocs);
740   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
741   a->reallocs          = 0;
742   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
743 
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748    This function returns an array of flags which indicate the locations of contiguous
749    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
750    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
751    Assume: sizes should be long enough to hold all the values.
752 */
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
756 {
757   int        i,j,k,row;
758   PetscTruth flg;
759 
760   PetscFunctionBegin;
761   for (i=0,j=0; i<n; j++) {
762     row = idx[i];
763     if (row%bs!=0) { /* Not the begining of a block */
764       sizes[j] = 1;
765       i++;
766     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
767       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
768       i++;
769     } else { /* Begining of the block, so check if the complete block exists */
770       flg = PETSC_TRUE;
771       for (k=1; k<bs; k++) {
772         if (row+k != idx[i+k]) { /* break in the block */
773           flg = PETSC_FALSE;
774           break;
775         }
776       }
777       if (flg == PETSC_TRUE) { /* No break in the bs */
778         sizes[j] = bs;
779         i+= bs;
780       } else {
781         sizes[j] = 1;
782         i++;
783       }
784     }
785   }
786   *bs_max = j;
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
793 {
794   PetscFunctionBegin;
795   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
796 }
797 
798 /* Only add/insert a(i,j) with i<=j (blocks).
799    Any a(i,j) with i>j input by user is ingored.
800 */
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
805 {
806   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
807   PetscErrorCode ierr;
808   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
809   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
810   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
811   int         ridx,cidx,bs2=a->bs2;
812   MatScalar   *ap,value,*aa=a->a,*bap;
813 
814   PetscFunctionBegin;
815 
816   for (k=0; k<m; k++) { /* loop over added rows */
817     row  = im[k];       /* row number */
818     brow = row/bs;      /* block row number */
819     if (row < 0) continue;
820 #if defined(PETSC_USE_BOPT_g)
821     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
822 #endif
823     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
824     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
825     rmax = imax[brow];  /* maximum space allocated for this row */
826     nrow = ailen[brow]; /* actual length of this row */
827     low  = 0;
828 
829     for (l=0; l<n; l++) { /* loop over added columns */
830       if (in[l] < 0) continue;
831 #if defined(PETSC_USE_BOPT_g)
832       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
833 #endif
834       col = in[l];
835       bcol = col/bs;              /* block col number */
836 
837       if (brow <= bcol){
838         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
839         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
840           /* element value a(k,l) */
841           if (roworiented) {
842             value = v[l + k*n];
843           } else {
844             value = v[k + l*m];
845           }
846 
847           /* move pointer bap to a(k,l) quickly and add/insert value */
848           if (!sorted) low = 0; high = nrow;
849           while (high-low > 7) {
850             t = (low+high)/2;
851             if (rp[t] > bcol) high = t;
852             else              low  = t;
853           }
854           for (i=low; i<high; i++) {
855             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
856             if (rp[i] > bcol) break;
857             if (rp[i] == bcol) {
858               bap  = ap +  bs2*i + bs*cidx + ridx;
859               if (is == ADD_VALUES) *bap += value;
860               else                  *bap  = value;
861               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
862               if (brow == bcol && ridx < cidx){
863                 bap  = ap +  bs2*i + bs*ridx + cidx;
864                 if (is == ADD_VALUES) *bap += value;
865                 else                  *bap  = value;
866               }
867               goto noinsert1;
868             }
869           }
870 
871       if (nonew == 1) goto noinsert1;
872       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
873       if (nrow >= rmax) {
874         /* there is no extra room in row, therefore enlarge */
875         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
876         MatScalar *new_a;
877 
878         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
879 
880         /* Malloc new storage space */
881         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
882         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
883         new_j = (int*)(new_a + bs2*new_nz);
884         new_i = new_j + new_nz;
885 
886         /* copy over old data into new slots */
887         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
888         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
889         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
890         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
891         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
892         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
893         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
894         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
895         /* free up old matrix storage */
896         ierr = PetscFree(a->a);CHKERRQ(ierr);
897         if (!a->singlemalloc) {
898           ierr = PetscFree(a->i);CHKERRQ(ierr);
899           ierr = PetscFree(a->j);CHKERRQ(ierr);
900         }
901         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
902         a->singlemalloc = PETSC_TRUE;
903 
904         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
905         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
906         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
907         a->maxnz += bs2*CHUNKSIZE;
908         a->reallocs++;
909         a->nz++;
910       }
911 
912       N = nrow++ - 1;
913       /* shift up all the later entries in this row */
914       for (ii=N; ii>=i; ii--) {
915         rp[ii+1] = rp[ii];
916         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
917       }
918       if (N>=i) {
919         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
920       }
921       rp[i]                      = bcol;
922       ap[bs2*i + bs*cidx + ridx] = value;
923       noinsert1:;
924       low = i;
925       /* } */
926         }
927       } /* end of if .. if..  */
928     }                     /* end of loop over added columns */
929     ailen[brow] = nrow;
930   }                       /* end of loop over added rows */
931 
932   PetscFunctionReturn(0);
933 }
934 
935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
950 
951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
966 
967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
968 
969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
977 
978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
987 
988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
996 
997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1005 
1006 #ifdef HAVE_MatICCFactor
1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1011 {
1012   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1013   Mat         outA;
1014   PetscErrorCode ierr;
1015   PetscTruth  row_identity,col_identity;
1016 
1017   PetscFunctionBegin;
1018   /*
1019   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1020   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1021   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1022   if (!row_identity || !col_identity) {
1023     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1024   }
1025   */
1026 
1027   outA          = inA;
1028   inA->factor   = FACTOR_CHOLESKY;
1029 
1030   if (!a->diag) {
1031     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1032   }
1033   /*
1034       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1035       for ILU(0) factorization with natural ordering
1036   */
1037   switch (a->bs) {
1038   case 1:
1039     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1040     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1041     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1042     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1043   case 2:
1044     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1045     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1046     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1047     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1048     break;
1049   case 3:
1050     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1051     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1052     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1053     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1054     break;
1055   case 4:
1056     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1057     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1058     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1059     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1060     break;
1061   case 5:
1062     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1063     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1064     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1065     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1066     break;
1067   case 6:
1068     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1069     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1070     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1071     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1072     break;
1073   case 7:
1074     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1075     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1076     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1077     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1078     break;
1079   default:
1080     a->row        = row;
1081     a->icol       = col;
1082     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1083     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1084 
1085     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1086     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1087     PetscLogObjectParent(inA,a->icol);
1088 
1089     if (!a->solve_work) {
1090       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1091       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1092     }
1093   }
1094 
1095   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 #endif
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1104 {
1105   static PetscTruth called = PETSC_FALSE;
1106   MPI_Comm          comm = A->comm;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1111   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1112   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1120 {
1121   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1122   int         i,nz,n;
1123 
1124   PetscFunctionBegin;
1125   nz = baij->maxnz;
1126   n  = mat->n;
1127   for (i=0; i<nz; i++) {
1128     baij->j[i] = indices[i];
1129   }
1130   baij->nz = nz;
1131   for (i=0; i<n; i++) {
1132     baij->ilen[i] = baij->imax[i];
1133   }
1134 
1135   PetscFunctionReturn(0);
1136 }
1137 EXTERN_C_END
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1141 /*@
1142     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1143        in the matrix.
1144 
1145   Input Parameters:
1146 +  mat     - the SeqSBAIJ matrix
1147 -  indices - the column indices
1148 
1149   Level: advanced
1150 
1151   Notes:
1152     This can be called if you have precomputed the nonzero structure of the
1153   matrix and want to provide it to the matrix object to improve the performance
1154   of the MatSetValues() operation.
1155 
1156     You MUST have set the correct numbers of nonzeros per row in the call to
1157   MatCreateSeqSBAIJ().
1158 
1159     MUST be called before any calls to MatSetValues()
1160 
1161 .seealso: MatCreateSeqSBAIJ
1162 @*/
1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1164 {
1165   PetscErrorCode ierr,(*f)(Mat,int *);
1166 
1167   PetscFunctionBegin;
1168   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1169   PetscValidPointer(indices,2);
1170   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1171   if (f) {
1172     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1173   } else {
1174     SETERRQ(1,"Wrong type of matrix to set column indices");
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1182 {
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1193 {
1194   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1195   PetscFunctionBegin;
1196   *array = a->a;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1203 {
1204   PetscFunctionBegin;
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #include "petscblaslapack.h"
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1212 {
1213   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1214   PetscErrorCode ierr;
1215   int one=1,i,bs=y->bs,bs2,j;
1216 
1217   PetscFunctionBegin;
1218   if (str == SAME_NONZERO_PATTERN) {
1219     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1220   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1221     if (y->xtoy && y->XtoY != X) {
1222       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1223       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1224     }
1225     if (!y->xtoy) { /* get xtoy */
1226       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1227       y->XtoY = X;
1228     }
1229     bs2 = bs*bs;
1230     for (i=0; i<x->nz; i++) {
1231       j = 0;
1232       while (j < bs2){
1233         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1234         j++;
1235       }
1236     }
1237     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1238   } else {
1239     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1240   }
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1246 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1247 {
1248   PetscFunctionBegin;
1249   *flg = PETSC_TRUE;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1255 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1256 {
1257   PetscFunctionBegin;
1258   *flg = PETSC_TRUE;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1264 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1265 {
1266   PetscFunctionBegin;
1267   *flg = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 /* -------------------------------------------------------------------*/
1272 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1273        MatGetRow_SeqSBAIJ,
1274        MatRestoreRow_SeqSBAIJ,
1275        MatMult_SeqSBAIJ_N,
1276 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1277        MatMultTranspose_SeqSBAIJ,
1278        MatMultTransposeAdd_SeqSBAIJ,
1279        MatSolve_SeqSBAIJ_N,
1280        0,
1281        0,
1282 /*10*/ 0,
1283        0,
1284        MatCholeskyFactor_SeqSBAIJ,
1285        MatRelax_SeqSBAIJ,
1286        MatTranspose_SeqSBAIJ,
1287 /*15*/ MatGetInfo_SeqSBAIJ,
1288        MatEqual_SeqSBAIJ,
1289        MatGetDiagonal_SeqSBAIJ,
1290        MatDiagonalScale_SeqSBAIJ,
1291        MatNorm_SeqSBAIJ,
1292 /*20*/ 0,
1293        MatAssemblyEnd_SeqSBAIJ,
1294        0,
1295        MatSetOption_SeqSBAIJ,
1296        MatZeroEntries_SeqSBAIJ,
1297 /*25*/ MatZeroRows_SeqSBAIJ,
1298        0,
1299        0,
1300        MatCholeskyFactorSymbolic_SeqSBAIJ,
1301        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1302 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1303        0,
1304        MatICCFactorSymbolic_SeqSBAIJ,
1305        MatGetArray_SeqSBAIJ,
1306        MatRestoreArray_SeqSBAIJ,
1307 /*35*/ MatDuplicate_SeqSBAIJ,
1308        0,
1309        0,
1310        0,
1311        0,
1312 /*40*/ MatAXPY_SeqSBAIJ,
1313        MatGetSubMatrices_SeqSBAIJ,
1314        MatIncreaseOverlap_SeqSBAIJ,
1315        MatGetValues_SeqSBAIJ,
1316        0,
1317 /*45*/ MatPrintHelp_SeqSBAIJ,
1318        MatScale_SeqSBAIJ,
1319        0,
1320        0,
1321        0,
1322 /*50*/ MatGetBlockSize_SeqSBAIJ,
1323        MatGetRowIJ_SeqSBAIJ,
1324        MatRestoreRowIJ_SeqSBAIJ,
1325        0,
1326        0,
1327 /*55*/ 0,
1328        0,
1329        0,
1330        0,
1331        MatSetValuesBlocked_SeqSBAIJ,
1332 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1333        0,
1334        0,
1335        MatGetPetscMaps_Petsc,
1336        0,
1337 /*65*/ 0,
1338        0,
1339        0,
1340        0,
1341        0,
1342 /*70*/ MatGetRowMax_SeqSBAIJ,
1343        0,
1344        0,
1345        0,
1346        0,
1347 /*75*/ 0,
1348        0,
1349        0,
1350        0,
1351        0,
1352 /*80*/ 0,
1353        0,
1354        0,
1355 #if !defined(PETSC_USE_COMPLEX)
1356        MatGetInertia_SeqSBAIJ,
1357 #else
1358        0,
1359 #endif
1360 /*85*/ MatLoad_SeqSBAIJ,
1361        MatIsSymmetric_SeqSBAIJ,
1362        MatIsStructurallySymmetric_SeqSBAIJ,
1363        MatIsHermitian_SeqSBAIJ
1364 };
1365 
1366 EXTERN_C_BEGIN
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1369 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1370 {
1371   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1372   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   if (aij->nonew != 1) {
1377     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1378   }
1379 
1380   /* allocate space for values if not already there */
1381   if (!aij->saved_values) {
1382     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1383   }
1384 
1385   /* copy values over */
1386   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 EXTERN_C_END
1390 
1391 EXTERN_C_BEGIN
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1394 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1395 {
1396   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1397   PetscErrorCode ierr;
1398   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1399 
1400   PetscFunctionBegin;
1401   if (aij->nonew != 1) {
1402     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1403   }
1404   if (!aij->saved_values) {
1405     SETERRQ(1,"Must call MatStoreValues(A);first");
1406   }
1407 
1408   /* copy values over */
1409   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 EXTERN_C_END
1413 
1414 EXTERN_C_BEGIN
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1417 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1418 {
1419   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1420   PetscErrorCode ierr;
1421   int          i,len,mbs,bs2;
1422   PetscTruth   flg;
1423 
1424   PetscFunctionBegin;
1425   B->preallocated = PETSC_TRUE;
1426   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1427   mbs  = B->m/bs;
1428   bs2  = bs*bs;
1429 
1430   if (mbs*bs != B->m) {
1431     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1432   }
1433 
1434   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1435   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1436   if (nnz) {
1437     for (i=0; i<mbs; i++) {
1438       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1439       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1440     }
1441   }
1442 
1443   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1444   if (!flg) {
1445     switch (bs) {
1446     case 1:
1447       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1448       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1449       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1450       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1451       B->ops->mult            = MatMult_SeqSBAIJ_1;
1452       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1453       break;
1454     case 2:
1455       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1456       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1457       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1458       B->ops->mult            = MatMult_SeqSBAIJ_2;
1459       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1460       break;
1461     case 3:
1462       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1463       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1464       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1465       B->ops->mult            = MatMult_SeqSBAIJ_3;
1466       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1467       break;
1468     case 4:
1469       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1470       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1471       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1472       B->ops->mult            = MatMult_SeqSBAIJ_4;
1473       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1474       break;
1475     case 5:
1476       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1477       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1478       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1479       B->ops->mult            = MatMult_SeqSBAIJ_5;
1480       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1481       break;
1482     case 6:
1483       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1484       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1485       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1486       B->ops->mult            = MatMult_SeqSBAIJ_6;
1487       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1488       break;
1489     case 7:
1490       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1491       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1492       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1493       B->ops->mult            = MatMult_SeqSBAIJ_7;
1494       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1495       break;
1496     }
1497   }
1498 
1499   b->mbs = mbs;
1500   b->nbs = mbs;
1501   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1502   if (!nnz) {
1503     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1504     else if (nz <= 0)        nz = 1;
1505     for (i=0; i<mbs; i++) {
1506       b->imax[i] = nz;
1507     }
1508     nz = nz*mbs; /* total nz */
1509   } else {
1510     nz = 0;
1511     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1512   }
1513   /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1514 
1515   /* allocate the matrix space */
1516   len  = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1517   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1518   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1519   b->j = (int*)(b->a + nz*bs2);
1520   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1521   b->i = b->j + nz;
1522   b->singlemalloc = PETSC_TRUE;
1523 
1524   /* pointer to beginning of each row */
1525   b->i[0] = 0;
1526   for (i=1; i<mbs+1; i++) {
1527     b->i[i] = b->i[i-1] + b->imax[i-1];
1528   }
1529 
1530   /* b->ilen will count nonzeros in each block row so far. */
1531   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1532   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1533   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1534 
1535   b->bs               = bs;
1536   b->bs2              = bs2;
1537   b->nz             = 0;
1538   b->maxnz          = nz*bs2;
1539 
1540   b->inew             = 0;
1541   b->jnew             = 0;
1542   b->anew             = 0;
1543   b->a2anew           = 0;
1544   b->permute          = PETSC_FALSE;
1545   PetscFunctionReturn(0);
1546 }
1547 EXTERN_C_END
1548 
1549 EXTERN_C_BEGIN
1550 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1552 EXTERN_C_END
1553 
1554 /*MC
1555    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1556    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1557 
1558    Options Database Keys:
1559 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1560 
1561   Level: beginner
1562 
1563 .seealso: MatCreateSeqSBAIJ
1564 M*/
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1569 PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1570 {
1571   Mat_SeqSBAIJ *b;
1572   PetscErrorCode ierr;
1573   int size;
1574 
1575   PetscFunctionBegin;
1576   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1577   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1578   B->m = B->M = PetscMax(B->m,B->M);
1579   B->n = B->N = PetscMax(B->n,B->N);
1580 
1581   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1582   B->data = (void*)b;
1583   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1584   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1585   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1586   B->ops->view        = MatView_SeqSBAIJ;
1587   B->factor           = 0;
1588   B->lupivotthreshold = 1.0;
1589   B->mapping          = 0;
1590   b->row              = 0;
1591   b->icol             = 0;
1592   b->reallocs         = 0;
1593   b->saved_values     = 0;
1594 
1595   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1596   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1597 
1598   b->sorted           = PETSC_FALSE;
1599   b->roworiented      = PETSC_TRUE;
1600   b->nonew            = 0;
1601   b->diag             = 0;
1602   b->solve_work       = 0;
1603   b->mult_work        = 0;
1604   B->spptr            = 0;
1605   b->keepzeroedrows   = PETSC_FALSE;
1606   b->xtoy             = 0;
1607   b->XtoY             = 0;
1608 
1609   b->inew             = 0;
1610   b->jnew             = 0;
1611   b->anew             = 0;
1612   b->a2anew           = 0;
1613   b->permute          = PETSC_FALSE;
1614 
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1616                                      "MatStoreValues_SeqSBAIJ",
1617                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1619                                      "MatRetrieveValues_SeqSBAIJ",
1620                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1622                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1623                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1625                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1626                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1628                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1629                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1631                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1632                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1633 
1634   B->symmetric                  = PETSC_TRUE;
1635   B->structurally_symmetric     = PETSC_TRUE;
1636   B->symmetric_set              = PETSC_TRUE;
1637   B->structurally_symmetric_set = PETSC_TRUE;
1638   PetscFunctionReturn(0);
1639 }
1640 EXTERN_C_END
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1644 /*@C
1645    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1646    compressed row) format.  For good matrix assembly performance the
1647    user should preallocate the matrix storage by setting the parameter nz
1648    (or the array nnz).  By setting these parameters accurately, performance
1649    during matrix assembly can be increased by more than a factor of 50.
1650 
1651    Collective on Mat
1652 
1653    Input Parameters:
1654 +  A - the symmetric matrix
1655 .  bs - size of block
1656 .  nz - number of block nonzeros per block row (same for all rows)
1657 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1658          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1659 
1660    Options Database Keys:
1661 .   -mat_no_unroll - uses code that does not unroll the loops in the
1662                      block calculations (much slower)
1663 .    -mat_block_size - size of the blocks to use
1664 
1665    Level: intermediate
1666 
1667    Notes:
1668    Specify the preallocated storage with either nz or nnz (not both).
1669    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1670    allocation.  For additional details, see the users manual chapter on
1671    matrices.
1672 
1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1674 @*/
1675 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1676   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1680   if (f) {
1681     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreateSeqSBAIJ"
1688 /*@C
1689    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1690    compressed row) format.  For good matrix assembly performance the
1691    user should preallocate the matrix storage by setting the parameter nz
1692    (or the array nnz).  By setting these parameters accurately, performance
1693    during matrix assembly can be increased by more than a factor of 50.
1694 
1695    Collective on MPI_Comm
1696 
1697    Input Parameters:
1698 +  comm - MPI communicator, set to PETSC_COMM_SELF
1699 .  bs - size of block
1700 .  m - number of rows, or number of columns
1701 .  nz - number of block nonzeros per block row (same for all rows)
1702 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1703          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1704 
1705    Output Parameter:
1706 .  A - the symmetric matrix
1707 
1708    Options Database Keys:
1709 .   -mat_no_unroll - uses code that does not unroll the loops in the
1710                      block calculations (much slower)
1711 .    -mat_block_size - size of the blocks to use
1712 
1713    Level: intermediate
1714 
1715    Notes:
1716 
1717    Specify the preallocated storage with either nz or nnz (not both).
1718    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1719    allocation.  For additional details, see the users manual chapter on
1720    matrices.
1721 
1722 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1723 @*/
1724 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1725 {
1726   PetscErrorCode ierr;
1727 
1728   PetscFunctionBegin;
1729   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1730   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1731   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1737 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1738 {
1739   Mat          C;
1740   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1741   PetscErrorCode ierr;
1742   int          i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1743 
1744   PetscFunctionBegin;
1745   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1746 
1747   *B = 0;
1748   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1749   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1750   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   c    = (Mat_SeqSBAIJ*)C->data;
1752 
1753   C->preallocated   = PETSC_TRUE;
1754   C->factor         = A->factor;
1755   c->row            = 0;
1756   c->icol           = 0;
1757   c->saved_values   = 0;
1758   c->keepzeroedrows = a->keepzeroedrows;
1759   C->assembled      = PETSC_TRUE;
1760 
1761   C->M    = A->M;
1762   C->N    = A->N;
1763   c->bs   = a->bs;
1764   c->bs2  = a->bs2;
1765   c->mbs  = a->mbs;
1766   c->nbs  = a->nbs;
1767 
1768   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1770   for (i=0; i<mbs; i++) {
1771     c->imax[i] = a->imax[i];
1772     c->ilen[i] = a->ilen[i];
1773   }
1774 
1775   /* allocate the matrix space */
1776   c->singlemalloc = PETSC_TRUE;
1777   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1778   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1779   c->j = (int*)(c->a + nz*bs2);
1780   c->i = c->j + nz;
1781   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1782   if (mbs > 0) {
1783     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1784     if (cpvalues == MAT_COPY_VALUES) {
1785       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     } else {
1787       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     }
1789   }
1790 
1791   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1792   c->sorted      = a->sorted;
1793   c->roworiented = a->roworiented;
1794   c->nonew       = a->nonew;
1795 
1796   if (a->diag) {
1797     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1798     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1799     for (i=0; i<mbs; i++) {
1800       c->diag[i] = a->diag[i];
1801     }
1802   } else c->diag        = 0;
1803   c->nz               = a->nz;
1804   c->maxnz            = a->maxnz;
1805   c->solve_work         = 0;
1806   c->mult_work          = 0;
1807   *B = C;
1808   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1814 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1815 {
1816   Mat_SeqSBAIJ *a;
1817   Mat          B;
1818   PetscErrorCode ierr;
1819   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1820   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1821   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1822   int          *masked,nmask,tmp,bs2,ishift;
1823   PetscScalar  *aa;
1824   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1828   bs2  = bs*bs;
1829 
1830   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1831   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1832   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1833   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1834   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1835   M = header[1]; N = header[2]; nz = header[3];
1836 
1837   if (header[3] < 0) {
1838     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1839   }
1840 
1841   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1842 
1843   /*
1844      This code adds extra rows to make sure the number of rows is
1845     divisible by the blocksize
1846   */
1847   mbs        = M/bs;
1848   extra_rows = bs - M + bs*(mbs);
1849   if (extra_rows == bs) extra_rows = 0;
1850   else                  mbs++;
1851   if (extra_rows) {
1852     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1853   }
1854 
1855   /* read in row lengths */
1856   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1857   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1858   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1859 
1860   /* read in column indices */
1861   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1862   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1863   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1864 
1865   /* loop over row lengths determining block row lengths */
1866   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1867   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1868   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1869   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1870   masked   = mask + mbs;
1871   rowcount = 0; nzcount = 0;
1872   for (i=0; i<mbs; i++) {
1873     nmask = 0;
1874     for (j=0; j<bs; j++) {
1875       kmax = rowlengths[rowcount];
1876       for (k=0; k<kmax; k++) {
1877         tmp = jj[nzcount++]/bs;   /* block col. index */
1878         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1879       }
1880       rowcount++;
1881     }
1882     s_browlengths[i] += nmask;
1883 
1884     /* zero out the mask elements we set */
1885     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1886   }
1887 
1888   /* create our matrix */
1889   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1890   ierr = MatSetType(B,type);CHKERRQ(ierr);
1891   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1892   a = (Mat_SeqSBAIJ*)B->data;
1893 
1894   /* set matrix "i" values */
1895   a->i[0] = 0;
1896   for (i=1; i<= mbs; i++) {
1897     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1898     a->ilen[i-1] = s_browlengths[i-1];
1899   }
1900   a->nz = a->i[mbs];
1901 
1902   /* read in nonzero values */
1903   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1904   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1905   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1906 
1907   /* set "a" and "j" values into matrix */
1908   nzcount = 0; jcount = 0;
1909   for (i=0; i<mbs; i++) {
1910     nzcountb = nzcount;
1911     nmask    = 0;
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp = jj[nzcount++]/bs; /* block col. index */
1916         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1917       }
1918     }
1919     /* sort the masked values */
1920     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1921 
1922     /* set "j" values into matrix */
1923     maskcount = 1;
1924     for (j=0; j<nmask; j++) {
1925       a->j[jcount++]  = masked[j];
1926       mask[masked[j]] = maskcount++;
1927     }
1928 
1929     /* set "a" values into matrix */
1930     ishift = bs2*a->i[i];
1931     for (j=0; j<bs; j++) {
1932       kmax = rowlengths[i*bs+j];
1933       for (k=0; k<kmax; k++) {
1934         tmp       = jj[nzcountb]/bs ; /* block col. index */
1935         if (tmp >= i){
1936           block     = mask[tmp] - 1;
1937           point     = jj[nzcountb] - bs*tmp;
1938           idx       = ishift + bs2*block + j + bs*point;
1939           a->a[idx] = aa[nzcountb];
1940         }
1941         nzcountb++;
1942       }
1943     }
1944     /* zero out the mask elements we set */
1945     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1946   }
1947   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1948 
1949   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1950   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(aa);CHKERRQ(ierr);
1952   ierr = PetscFree(jj);CHKERRQ(ierr);
1953   ierr = PetscFree(mask);CHKERRQ(ierr);
1954 
1955   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatView_Private(B);CHKERRQ(ierr);
1958   *A = B;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1965 {
1966   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1967   MatScalar    *aa=a->a,*v,*v1;
1968   PetscScalar  *x,*b,*t,sum,d;
1969   PetscErrorCode ierr;
1970   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j;
1971   int          nz,nz1,*vj,*vj1,i;
1972 
1973   PetscFunctionBegin;
1974   its = its*lits;
1975   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1976 
1977   if (bs > 1)
1978     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1979 
1980   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1981   if (xx != bb) {
1982     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1983   } else {
1984     b = x;
1985   }
1986 
1987   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1988 
1989   if (flag & SOR_ZERO_INITIAL_GUESS) {
1990     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1991       for (i=0; i<m; i++)
1992         t[i] = b[i];
1993 
1994       for (i=0; i<m; i++){
1995         d  = *(aa + ai[i]);  /* diag[i] */
1996         v  = aa + ai[i] + 1;
1997         vj = aj + ai[i] + 1;
1998         nz = ai[i+1] - ai[i] - 1;
1999         x[i] = omega*t[i]/d;
2000         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2001         PetscLogFlops(2*nz-1);
2002       }
2003     }
2004 
2005     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2006       for (i=0; i<m; i++)
2007         t[i] = b[i];
2008 
2009       for (i=0; i<m-1; i++){  /* update rhs */
2010         v  = aa + ai[i] + 1;
2011         vj = aj + ai[i] + 1;
2012         nz = ai[i+1] - ai[i] - 1;
2013         while (nz--) t[*vj++] -= x[i]*(*v++);
2014         PetscLogFlops(2*nz-1);
2015       }
2016       for (i=m-1; i>=0; i--){
2017         d  = *(aa + ai[i]);
2018         v  = aa + ai[i] + 1;
2019         vj = aj + ai[i] + 1;
2020         nz = ai[i+1] - ai[i] - 1;
2021         sum = t[i];
2022         while (nz--) sum -= x[*vj++]*(*v++);
2023         PetscLogFlops(2*nz-1);
2024         x[i] =   (1-omega)*x[i] + omega*sum/d;
2025       }
2026     }
2027     its--;
2028   }
2029 
2030   while (its--) {
2031     /*
2032        forward sweep:
2033        for i=0,...,m-1:
2034          sum[i] = (b[i] - U(i,:)x )/d[i];
2035          x[i]   = (1-omega)x[i] + omega*sum[i];
2036          b      = b - x[i]*U^T(i,:);
2037 
2038     */
2039     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2040       for (i=0; i<m; i++)
2041         t[i] = b[i];
2042 
2043       for (i=0; i<m; i++){
2044         d  = *(aa + ai[i]);  /* diag[i] */
2045         v  = aa + ai[i] + 1; v1=v;
2046         vj = aj + ai[i] + 1; vj1=vj;
2047         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2048         sum = t[i];
2049         while (nz1--) sum -= (*v1++)*x[*vj1++];
2050         x[i] = (1-omega)*x[i] + omega*sum/d;
2051         while (nz--) t[*vj++] -= x[i]*(*v++);
2052         PetscLogFlops(4*nz-2);
2053       }
2054     }
2055 
2056   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2057       /*
2058        backward sweep:
2059        b = b - x[i]*U^T(i,:), i=0,...,n-2
2060        for i=m-1,...,0:
2061          sum[i] = (b[i] - U(i,:)x )/d[i];
2062          x[i]   = (1-omega)x[i] + omega*sum[i];
2063       */
2064       for (i=0; i<m; i++)
2065         t[i] = b[i];
2066 
2067       for (i=0; i<m-1; i++){  /* update rhs */
2068         v  = aa + ai[i] + 1;
2069         vj = aj + ai[i] + 1;
2070         nz = ai[i+1] - ai[i] - 1;
2071         while (nz--) t[*vj++] -= x[i]*(*v++);
2072         PetscLogFlops(2*nz-1);
2073       }
2074       for (i=m-1; i>=0; i--){
2075         d  = *(aa + ai[i]);
2076         v  = aa + ai[i] + 1;
2077         vj = aj + ai[i] + 1;
2078         nz = ai[i+1] - ai[i] - 1;
2079         sum = t[i];
2080         while (nz--) sum -= x[*vj++]*(*v++);
2081         PetscLogFlops(2*nz-1);
2082         x[i] =   (1-omega)*x[i] + omega*sum/d;
2083       }
2084     }
2085   }
2086 
2087   ierr = PetscFree(t);CHKERRQ(ierr);
2088   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2089   if (bb != xx) {
2090     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2091   }
2092 
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 
2097 
2098 
2099 
2100 
2101