xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i;
40 
41   PetscFunctionBegin;
42   if (!a->diag) {
43     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   }
45   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60   if (!blockcompressed) {
61     /* malloc & create the natural set of indices */
62     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
63     for (i=0; i<n+1; i++) {
64       for (j=0; j<bs; j++) {
65         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
66       }
67     }
68     for (i=0; i<nz; i++) {
69       for (j=0; j<bs; j++) {
70         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
71       }
72     }
73   } else { /* blockcompressed */
74     if (oshift == 1) {
75       /* temporarily add 1 to i and j indices */
76       for (i=0; i<nz; i++) a->j[i]++;
77       for (i=0; i<n+1; i++) a->i[i]++;
78     }
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
88 {
89   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90   PetscInt     i,n = a->mbs,nz = a->i[n];
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95 
96   if (!blockcompressed) {
97     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) { /* blockcompressed */
99     for (i=0; i<nz; i++) a->j[i]--;
100     for (i=0; i<n+1; i++) a->i[i]--;
101   }
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
116 #endif
117   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
118   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
119   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
120   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
121   ierr = PetscFree(a->diag);CHKERRQ(ierr);
122   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
123   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
125   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
126   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
127   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
128   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
129 
130   ierr = PetscFree(a->inew);CHKERRQ(ierr);
131   ierr = PetscFree(a);CHKERRQ(ierr);
132 
133   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
146 {
147   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   switch (op) {
152   case MAT_ROW_ORIENTED:
153     a->roworiented = flg;
154     break;
155   case MAT_KEEP_ZEROED_ROWS:
156     a->keepzeroedrows = flg;
157     break;
158   case MAT_NEW_NONZERO_LOCATIONS:
159     a->nonew = (flg ? 0 : 1);
160     break;
161   case MAT_NEW_NONZERO_LOCATION_ERR:
162     a->nonew = (flg ? -1 : 0);
163     break;
164   case MAT_NEW_NONZERO_ALLOCATION_ERR:
165     a->nonew = (flg ? -2 : 0);
166     break;
167   case MAT_NEW_DIAGONALS:
168   case MAT_IGNORE_OFF_PROC_ENTRIES:
169   case MAT_USE_HASH_TABLE:
170     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
171     break;
172   case MAT_HERMITIAN:
173     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
174   case MAT_SYMMETRIC:
175   case MAT_STRUCTURALLY_SYMMETRIC:
176   case MAT_SYMMETRY_ETERNAL:
177     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
178     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
179     break;
180   case MAT_IGNORE_LOWER_TRIANGULAR:
181     a->ignore_ltriangular = flg;
182     break;
183   case MAT_ERROR_LOWER_TRIANGULAR:
184     a->ignore_ltriangular = flg;
185     break;
186   case MAT_GETROW_UPPERTRIANGULAR:
187     a->getrow_utriangular = flg;
188     break;
189   default:
190     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
197 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
198 {
199   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
200   PetscErrorCode ierr;
201   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
202   MatScalar      *aa,*aa_i;
203   PetscScalar    *v_i;
204 
205   PetscFunctionBegin;
206   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
207   /* Get the upper triangular part of the row */
208   bs  = A->rmap.bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
271 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
272 {
273   PetscErrorCode ierr;
274 
275   PetscFunctionBegin;
276   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
277   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
283 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
284 {
285   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
286 
287   PetscFunctionBegin;
288   a->getrow_utriangular = PETSC_TRUE;
289   PetscFunctionReturn(0);
290 }
291 #undef __FUNCT__
292 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
293 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
294 {
295   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
296 
297   PetscFunctionBegin;
298   a->getrow_utriangular = PETSC_FALSE;
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
304 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
305 {
306   PetscErrorCode ierr;
307   PetscFunctionBegin;
308   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
314 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
315 {
316   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
317   PetscErrorCode    ierr;
318   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
319   const char        *name;
320   PetscViewerFormat format;
321 
322   PetscFunctionBegin;
323   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
324   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
325   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
326     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
327   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
328     if (A->factor && bs>1){
329       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
330       PetscFunctionReturn(0);
331     }
332     Mat aij;
333     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
334     ierr = MatView(aij,viewer);CHKERRQ(ierr);
335     ierr = MatDestroy(aij);CHKERRQ(ierr);
336   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
337     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
338     for (i=0; i<a->mbs; i++) {
339       for (j=0; j<bs; j++) {
340         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
341         for (k=a->i[i]; k<a->i[i+1]; k++) {
342           for (l=0; l<bs; l++) {
343 #if defined(PETSC_USE_COMPLEX)
344             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
346                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
348               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
349                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
350             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
351               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
352             }
353 #else
354             if (a->a[bs2*k + l*bs + j] != 0.0) {
355               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
356             }
357 #endif
358           }
359         }
360         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
361       }
362     }
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
364   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
365      PetscFunctionReturn(0);
366   } else {
367     if (A->factor && bs>1){
368       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
369     }
370     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
371     for (i=0; i<a->mbs; i++) {
372       for (j=0; j<bs; j++) {
373         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
374         for (k=a->i[i]; k<a->i[i+1]; k++) {
375           for (l=0; l<bs; l++) {
376 #if defined(PETSC_USE_COMPLEX)
377             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
379                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
380             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
382                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
383             } else {
384               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
385             }
386 #else
387             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
388 #endif
389           }
390         }
391         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
392       }
393     }
394     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
395   }
396   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
402 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
403 {
404   Mat            A = (Mat) Aa;
405   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
406   PetscErrorCode ierr;
407   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
408   PetscMPIInt    rank;
409   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
410   MatScalar      *aa;
411   MPI_Comm       comm;
412   PetscViewer    viewer;
413 
414   PetscFunctionBegin;
415   /*
416     This is nasty. If this is called from an originally parallel matrix
417     then all processes call this,but only the first has the matrix so the
418     rest should return immediately.
419   */
420   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
421   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
422   if (rank) PetscFunctionReturn(0);
423 
424   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
425 
426   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
427   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
428 
429   /* loop over matrix elements drawing boxes */
430   color = PETSC_DRAW_BLUE;
431   for (i=0,row=0; i<mbs; i++,row+=bs) {
432     for (j=a->i[i]; j<a->i[i+1]; j++) {
433       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
434       x_l = a->j[j]*bs; x_r = x_l + 1.0;
435       aa = a->a + j*bs2;
436       for (k=0; k<bs; k++) {
437         for (l=0; l<bs; l++) {
438           if (PetscRealPart(*aa++) >=  0.) continue;
439           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
440         }
441       }
442     }
443   }
444   color = PETSC_DRAW_CYAN;
445   for (i=0,row=0; i<mbs; i++,row+=bs) {
446     for (j=a->i[i]; j<a->i[i+1]; j++) {
447       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
448       x_l = a->j[j]*bs; x_r = x_l + 1.0;
449       aa = a->a + j*bs2;
450       for (k=0; k<bs; k++) {
451         for (l=0; l<bs; l++) {
452           if (PetscRealPart(*aa++) != 0.) continue;
453           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
454         }
455       }
456     }
457   }
458 
459   color = PETSC_DRAW_RED;
460   for (i=0,row=0; i<mbs; i++,row+=bs) {
461     for (j=a->i[i]; j<a->i[i+1]; j++) {
462       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
463       x_l = a->j[j]*bs; x_r = x_l + 1.0;
464       aa = a->a + j*bs2;
465       for (k=0; k<bs; k++) {
466         for (l=0; l<bs; l++) {
467           if (PetscRealPart(*aa++) <= 0.) continue;
468           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
469         }
470       }
471     }
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
478 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
479 {
480   PetscErrorCode ierr;
481   PetscReal      xl,yl,xr,yr,w,h;
482   PetscDraw      draw;
483   PetscTruth     isnull;
484 
485   PetscFunctionBegin;
486   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
487   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
488 
489   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
490   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
491   xr += w;    yr += h;  xl = -w;     yl = -h;
492   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
493   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
494   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatView_SeqSBAIJ"
500 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
501 {
502   PetscErrorCode ierr;
503   PetscTruth     iascii,isdraw;
504 
505   PetscFunctionBegin;
506   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
507   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
508   if (iascii){
509     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
510   } else if (isdraw) {
511     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
512   } else {
513     Mat B;
514     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
515     ierr = MatView(B,viewer);CHKERRQ(ierr);
516     ierr = MatDestroy(B);CHKERRQ(ierr);
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
524 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
525 {
526   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
527   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
528   PetscInt     *ai = a->i,*ailen = a->ilen;
529   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
530   MatScalar    *ap,*aa = a->a;
531 
532   PetscFunctionBegin;
533   for (k=0; k<m; k++) { /* loop over rows */
534     row  = im[k]; brow = row/bs;
535     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
536     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
537     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
538     nrow = ailen[brow];
539     for (l=0; l<n; l++) { /* loop over columns */
540       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
541       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
542       col  = in[l] ;
543       bcol = col/bs;
544       cidx = col%bs;
545       ridx = row%bs;
546       high = nrow;
547       low  = 0; /* assume unsorted */
548       while (high-low > 5) {
549         t = (low+high)/2;
550         if (rp[t] > bcol) high = t;
551         else             low  = t;
552       }
553       for (i=low; i<high; i++) {
554         if (rp[i] > bcol) break;
555         if (rp[i] == bcol) {
556           *v++ = ap[bs2*i+bs*cidx+ridx];
557           goto finished;
558         }
559       }
560       *v++ = 0.0;
561       finished:;
562     }
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
570 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
571 {
572   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
573   PetscErrorCode  ierr;
574   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
575   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
576   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
577   PetscTruth      roworiented=a->roworiented;
578   const MatScalar *value = v;
579   MatScalar       *ap,*aa = a->a,*bap;
580 
581   PetscFunctionBegin;
582   if (roworiented) {
583     stepval = (n-1)*bs;
584   } else {
585     stepval = (m-1)*bs;
586   }
587   for (k=0; k<m; k++) { /* loop over added rows */
588     row  = im[k];
589     if (row < 0) continue;
590 #if defined(PETSC_USE_DEBUG)
591     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
592 #endif
593     rp   = aj + ai[row];
594     ap   = aa + bs2*ai[row];
595     rmax = imax[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       if (in[l] < 0) continue;
601       col = in[l];
602 #if defined(PETSC_USE_DEBUG)
603       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
604 #endif
605       if (col < row) continue; /* ignore lower triangular block */
606       if (roworiented) {
607         value = v + k*(stepval+bs)*bs + l*bs;
608       } else {
609         value = v + l*(stepval+bs)*bs + k*bs;
610       }
611       if (col <= lastcol) low = 0; else high = nrow;
612       lastcol = col;
613       while (high-low > 7) {
614         t = (low+high)/2;
615         if (rp[t] > col) high = t;
616         else             low  = t;
617       }
618       for (i=low; i<high; i++) {
619         if (rp[i] > col) break;
620         if (rp[i] == col) {
621           bap  = ap +  bs2*i;
622           if (roworiented) {
623             if (is == ADD_VALUES) {
624               for (ii=0; ii<bs; ii++,value+=stepval) {
625                 for (jj=ii; jj<bs2; jj+=bs) {
626                   bap[jj] += *value++;
627                 }
628               }
629             } else {
630               for (ii=0; ii<bs; ii++,value+=stepval) {
631                 for (jj=ii; jj<bs2; jj+=bs) {
632                   bap[jj] = *value++;
633                 }
634                }
635             }
636           } else {
637             if (is == ADD_VALUES) {
638               for (ii=0; ii<bs; ii++,value+=stepval) {
639                 for (jj=0; jj<bs; jj++) {
640                   *bap++ += *value++;
641                 }
642               }
643             } else {
644               for (ii=0; ii<bs; ii++,value+=stepval) {
645                 for (jj=0; jj<bs; jj++) {
646                   *bap++  = *value++;
647                 }
648               }
649             }
650           }
651           goto noinsert2;
652         }
653       }
654       if (nonew == 1) goto noinsert2;
655       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
656       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
657       N = nrow++ - 1; high++;
658       /* shift up all the later entries in this row */
659       for (ii=N; ii>=i; ii--) {
660         rp[ii+1] = rp[ii];
661         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
662       }
663       if (N >= i) {
664         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
665       }
666       rp[i] = col;
667       bap   = ap +  bs2*i;
668       if (roworiented) {
669         for (ii=0; ii<bs; ii++,value+=stepval) {
670           for (jj=ii; jj<bs2; jj+=bs) {
671             bap[jj] = *value++;
672           }
673         }
674       } else {
675         for (ii=0; ii<bs; ii++,value+=stepval) {
676           for (jj=0; jj<bs; jj++) {
677             *bap++  = *value++;
678           }
679         }
680        }
681     noinsert2:;
682       low = i;
683     }
684     ailen[row] = nrow;
685   }
686    PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
691 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
692 {
693   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
694   PetscErrorCode ierr;
695   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
696   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
697   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
698   MatScalar      *aa = a->a,*ap;
699 
700   PetscFunctionBegin;
701   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
702 
703   if (m) rmax = ailen[0];
704   for (i=1; i<mbs; i++) {
705     /* move each row back by the amount of empty slots (fshift) before it*/
706     fshift += imax[i-1] - ailen[i-1];
707      rmax   = PetscMax(rmax,ailen[i]);
708      if (fshift) {
709        ip = aj + ai[i]; ap = aa + bs2*ai[i];
710        N = ailen[i];
711        for (j=0; j<N; j++) {
712          ip[j-fshift] = ip[j];
713          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
714        }
715      }
716      ai[i] = ai[i-1] + ailen[i-1];
717   }
718   if (mbs) {
719     fshift += imax[mbs-1] - ailen[mbs-1];
720      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
721   }
722   /* reset ilen and imax for each row */
723   for (i=0; i<mbs; i++) {
724     ailen[i] = imax[i] = ai[i+1] - ai[i];
725   }
726   a->nz = ai[mbs];
727 
728   /* diagonals may have moved, reset it */
729   if (a->diag) {
730     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
731   }
732   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
733   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
734   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
735   a->reallocs          = 0;
736   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
737   PetscFunctionReturn(0);
738 }
739 
740 /*
741    This function returns an array of flags which indicate the locations of contiguous
742    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
743    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
744    Assume: sizes should be long enough to hold all the values.
745 */
746 #undef __FUNCT__
747 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
748 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
749 {
750   PetscInt   i,j,k,row;
751   PetscTruth flg;
752 
753   PetscFunctionBegin;
754    for (i=0,j=0; i<n; j++) {
755      row = idx[i];
756      if (row%bs!=0) { /* Not the begining of a block */
757        sizes[j] = 1;
758        i++;
759      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
760        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
761        i++;
762      } else { /* Begining of the block, so check if the complete block exists */
763        flg = PETSC_TRUE;
764        for (k=1; k<bs; k++) {
765          if (row+k != idx[i+k]) { /* break in the block */
766            flg = PETSC_FALSE;
767            break;
768          }
769        }
770        if (flg) { /* No break in the bs */
771          sizes[j] = bs;
772          i+= bs;
773        } else {
774          sizes[j] = 1;
775          i++;
776        }
777      }
778    }
779    *bs_max = j;
780    PetscFunctionReturn(0);
781 }
782 
783 
784 /* Only add/insert a(i,j) with i<=j (blocks).
785    Any a(i,j) with i>j input by user is ingored.
786 */
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
790 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
791 {
792   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
793   PetscErrorCode ierr;
794   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
795   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
796   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
797   PetscInt       ridx,cidx,bs2=a->bs2;
798   MatScalar      *ap,value,*aa=a->a,*bap;
799 
800   PetscFunctionBegin;
801   for (k=0; k<m; k++) { /* loop over added rows */
802     row  = im[k];       /* row number */
803     brow = row/bs;      /* block row number */
804     if (row < 0) continue;
805 #if defined(PETSC_USE_DEBUG)
806     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
807 #endif
808     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
809     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
810     rmax = imax[brow];  /* maximum space allocated for this row */
811     nrow = ailen[brow]; /* actual length of this row */
812     low  = 0;
813 
814     for (l=0; l<n; l++) { /* loop over added columns */
815       if (in[l] < 0) continue;
816 #if defined(PETSC_USE_DEBUG)
817       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
818 #endif
819       col = in[l];
820       bcol = col/bs;              /* block col number */
821 
822       if (brow > bcol) {
823         if (a->ignore_ltriangular){
824           continue; /* ignore lower triangular values */
825         } else {
826           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
827         }
828       }
829 
830       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
831       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
832         /* element value a(k,l) */
833         if (roworiented) {
834           value = v[l + k*n];
835         } else {
836           value = v[k + l*m];
837         }
838 
839         /* move pointer bap to a(k,l) quickly and add/insert value */
840         if (col <= lastcol) low = 0; high = nrow;
841         lastcol = col;
842         while (high-low > 7) {
843           t = (low+high)/2;
844           if (rp[t] > bcol) high = t;
845           else              low  = t;
846         }
847         for (i=low; i<high; i++) {
848           if (rp[i] > bcol) break;
849           if (rp[i] == bcol) {
850             bap  = ap +  bs2*i + bs*cidx + ridx;
851             if (is == ADD_VALUES) *bap += value;
852             else                  *bap  = value;
853             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
854             if (brow == bcol && ridx < cidx){
855               bap  = ap +  bs2*i + bs*ridx + cidx;
856               if (is == ADD_VALUES) *bap += value;
857               else                  *bap  = value;
858             }
859             goto noinsert1;
860           }
861         }
862 
863         if (nonew == 1) goto noinsert1;
864         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
865         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
866 
867         N = nrow++ - 1; high++;
868         /* shift up all the later entries in this row */
869         for (ii=N; ii>=i; ii--) {
870           rp[ii+1] = rp[ii];
871           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
872         }
873         if (N>=i) {
874           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
875         }
876         rp[i]                      = bcol;
877         ap[bs2*i + bs*cidx + ridx] = value;
878       noinsert1:;
879         low = i;
880       }
881     }   /* end of loop over added columns */
882     ailen[brow] = nrow;
883   }   /* end of loop over added rows */
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
889 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
890 {
891   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
892   Mat            outA;
893   PetscErrorCode ierr;
894   PetscTruth     row_identity;
895 
896   PetscFunctionBegin;
897   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
898   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
899   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
900   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
901 
902   outA        = inA;
903   inA->factor = FACTOR_CHOLESKY;
904 
905   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
906   /*
907     Blocksize < 8 have a special faster factorization/solver
908     for ICC(0) factorization with natural ordering
909   */
910   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
911   case 1:
912     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
913     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
914     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
915     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
916     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
917     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
918     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
919     break;
920   case 2:
921     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
922     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
923     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
924     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
925     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
926     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
927     break;
928   case 3:
929      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
930      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
931      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
932      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
933      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
934      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
935      break;
936   case 4:
937     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
938     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
939     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
940     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
941     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
942     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
943     break;
944   case 5:
945     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
946     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
947     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
948     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
949     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
950     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
951     break;
952   case 6:
953     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
954     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
955     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
956     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
957     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
958     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
959     break;
960   case 7:
961     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
962     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
963     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
964     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
965     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
966     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
967     break;
968   default:
969     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
970     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
971     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
972     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
973     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
974     break;
975   }
976 
977   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
978   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
979   a->row = row;
980   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
981   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
982   a->col = row;
983 
984   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
985   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
986   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
987 
988   if (!a->solve_work) {
989     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
990     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
991   }
992 
993   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 EXTERN_C_BEGIN
998 #undef __FUNCT__
999 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1000 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1001 {
1002   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1003   PetscInt     i,nz,n;
1004 
1005   PetscFunctionBegin;
1006   nz = baij->maxnz;
1007   n  = mat->cmap.n;
1008   for (i=0; i<nz; i++) {
1009     baij->j[i] = indices[i];
1010   }
1011    baij->nz = nz;
1012    for (i=0; i<n; i++) {
1013      baij->ilen[i] = baij->imax[i];
1014    }
1015    PetscFunctionReturn(0);
1016 }
1017 EXTERN_C_END
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1021 /*@
1022   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1023   in the matrix.
1024 
1025   Input Parameters:
1026   +  mat     - the SeqSBAIJ matrix
1027   -  indices - the column indices
1028 
1029   Level: advanced
1030 
1031   Notes:
1032   This can be called if you have precomputed the nonzero structure of the
1033   matrix and want to provide it to the matrix object to improve the performance
1034   of the MatSetValues() operation.
1035 
1036   You MUST have set the correct numbers of nonzeros per row in the call to
1037   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1038 
1039   MUST be called before any calls to MatSetValues()
1040 
1041   .seealso: MatCreateSeqSBAIJ
1042 @*/
1043 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1044 {
1045   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1046 
1047   PetscFunctionBegin;
1048   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1049   PetscValidPointer(indices,2);
1050   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1051   if (f) {
1052     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1053   } else {
1054     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1061 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1062 {
1063   PetscErrorCode ierr;
1064 
1065   PetscFunctionBegin;
1066   /* If the two matrices have the same copy implementation, use fast copy. */
1067   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1068     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1069     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1070 
1071     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1072       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1073     }
1074     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1075   } else {
1076     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1077     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1078     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1085 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1086 {
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1096 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1097 {
1098   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1099   PetscFunctionBegin;
1100   *array = a->a;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1106 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1107 {
1108   PetscFunctionBegin;
1109   PetscFunctionReturn(0);
1110  }
1111 
1112 #include "petscblaslapack.h"
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1115 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1116 {
1117   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1118   PetscErrorCode ierr;
1119   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1120   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1121 
1122   PetscFunctionBegin;
1123   if (str == SAME_NONZERO_PATTERN) {
1124     PetscScalar alpha = a;
1125     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1126   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1127     if (y->xtoy && y->XtoY != X) {
1128       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1129       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1130     }
1131     if (!y->xtoy) { /* get xtoy */
1132       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1133       y->XtoY = X;
1134     }
1135     bs2 = bs*bs;
1136     for (i=0; i<x->nz; i++) {
1137       j = 0;
1138       while (j < bs2){
1139         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1140         j++;
1141       }
1142     }
1143     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1144   } else {
1145     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1146     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1147     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1148   }
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1154 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1155 {
1156   PetscFunctionBegin;
1157   *flg = PETSC_TRUE;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1163 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1164 {
1165    PetscFunctionBegin;
1166    *flg = PETSC_TRUE;
1167    PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1172 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1173  {
1174    PetscFunctionBegin;
1175    *flg = PETSC_FALSE;
1176    PetscFunctionReturn(0);
1177  }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1181 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1182 {
1183   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1184   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1185   PetscScalar    *aa = a->a;
1186 
1187   PetscFunctionBegin;
1188   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1194 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1195 {
1196   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1197   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1198   PetscScalar    *aa = a->a;
1199 
1200   PetscFunctionBegin;
1201   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /* -------------------------------------------------------------------*/
1206 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1207        MatGetRow_SeqSBAIJ,
1208        MatRestoreRow_SeqSBAIJ,
1209        MatMult_SeqSBAIJ_N,
1210 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1211        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1212        MatMultAdd_SeqSBAIJ_N,
1213        MatSolve_SeqSBAIJ_N,
1214        0,
1215        0,
1216 /*10*/ 0,
1217        0,
1218        MatCholeskyFactor_SeqSBAIJ,
1219        MatRelax_SeqSBAIJ,
1220        MatTranspose_SeqSBAIJ,
1221 /*15*/ MatGetInfo_SeqSBAIJ,
1222        MatEqual_SeqSBAIJ,
1223        MatGetDiagonal_SeqSBAIJ,
1224        MatDiagonalScale_SeqSBAIJ,
1225        MatNorm_SeqSBAIJ,
1226 /*20*/ 0,
1227        MatAssemblyEnd_SeqSBAIJ,
1228        0,
1229        MatSetOption_SeqSBAIJ,
1230        MatZeroEntries_SeqSBAIJ,
1231 /*25*/ 0,
1232        0,
1233        0,
1234        MatCholeskyFactorSymbolic_SeqSBAIJ,
1235        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1236 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1237        0,
1238        MatICCFactorSymbolic_SeqSBAIJ,
1239        MatGetArray_SeqSBAIJ,
1240        MatRestoreArray_SeqSBAIJ,
1241 /*35*/ MatDuplicate_SeqSBAIJ,
1242        MatForwardSolve_SeqSBAIJ_N,
1243        MatBackwardSolve_SeqSBAIJ_N,
1244        0,
1245        MatICCFactor_SeqSBAIJ,
1246 /*40*/ MatAXPY_SeqSBAIJ,
1247        MatGetSubMatrices_SeqSBAIJ,
1248        MatIncreaseOverlap_SeqSBAIJ,
1249        MatGetValues_SeqSBAIJ,
1250        MatCopy_SeqSBAIJ,
1251 /*45*/ 0,
1252        MatScale_SeqSBAIJ,
1253        0,
1254        0,
1255        0,
1256 /*50*/ 0,
1257        MatGetRowIJ_SeqSBAIJ,
1258        MatRestoreRowIJ_SeqSBAIJ,
1259        0,
1260        0,
1261 /*55*/ 0,
1262        0,
1263        0,
1264        0,
1265        MatSetValuesBlocked_SeqSBAIJ,
1266 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1267        0,
1268        0,
1269        0,
1270        0,
1271 /*65*/ 0,
1272        0,
1273        0,
1274        0,
1275        0,
1276 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1277        0,
1278        0,
1279        0,
1280        0,
1281 /*75*/ 0,
1282        0,
1283        0,
1284        0,
1285        0,
1286 /*80*/ 0,
1287        0,
1288        0,
1289 #if !defined(PETSC_USE_COMPLEX)
1290        MatGetInertia_SeqSBAIJ,
1291 #else
1292        0,
1293 #endif
1294        MatLoad_SeqSBAIJ,
1295 /*85*/ MatIsSymmetric_SeqSBAIJ,
1296        MatIsHermitian_SeqSBAIJ,
1297        MatIsStructurallySymmetric_SeqSBAIJ,
1298        0,
1299        0,
1300 /*90*/ 0,
1301        0,
1302        0,
1303        0,
1304        0,
1305 /*95*/ 0,
1306        0,
1307        0,
1308        0,
1309        0,
1310 /*100*/0,
1311        0,
1312        0,
1313        0,
1314        0,
1315 /*105*/0,
1316        MatRealPart_SeqSBAIJ,
1317        MatImaginaryPart_SeqSBAIJ,
1318        MatGetRowUpperTriangular_SeqSBAIJ,
1319        MatRestoreRowUpperTriangular_SeqSBAIJ
1320 };
1321 
1322 EXTERN_C_BEGIN
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1325 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1326 {
1327   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1328   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   if (aij->nonew != 1) {
1333     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1334   }
1335 
1336   /* allocate space for values if not already there */
1337   if (!aij->saved_values) {
1338     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1339   }
1340 
1341   /* copy values over */
1342   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 EXTERN_C_END
1346 
1347 EXTERN_C_BEGIN
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1350 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1351 {
1352   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1353   PetscErrorCode ierr;
1354   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1355 
1356   PetscFunctionBegin;
1357   if (aij->nonew != 1) {
1358     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1359   }
1360   if (!aij->saved_values) {
1361     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1362   }
1363 
1364   /* copy values over */
1365   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 EXTERN_C_END
1369 
1370 EXTERN_C_BEGIN
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1373 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1374 {
1375   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1376   PetscErrorCode ierr;
1377   PetscInt       i,mbs,bs2;
1378   PetscTruth     skipallocation = PETSC_FALSE,flg;
1379 
1380   PetscFunctionBegin;
1381   B->preallocated = PETSC_TRUE;
1382   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1383   B->rmap.bs = B->cmap.bs = bs;
1384   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1385   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1386 
1387   mbs  = B->rmap.N/bs;
1388   bs2  = bs*bs;
1389 
1390   if (mbs*bs != B->rmap.N) {
1391     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1392   }
1393 
1394   if (nz == MAT_SKIP_ALLOCATION) {
1395     skipallocation = PETSC_TRUE;
1396     nz             = 0;
1397   }
1398 
1399   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1400   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1401   if (nnz) {
1402     for (i=0; i<mbs; i++) {
1403       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1404       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1405     }
1406   }
1407 
1408   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1409   if (!flg) {
1410     switch (bs) {
1411     case 1:
1412       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1413       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1414       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1415       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1416       B->ops->mult             = MatMult_SeqSBAIJ_1;
1417       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1418       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1419       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1420       break;
1421     case 2:
1422       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1423       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1424       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1425       B->ops->mult             = MatMult_SeqSBAIJ_2;
1426       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1427       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1428       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1429       break;
1430     case 3:
1431       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1432       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1433       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1434       B->ops->mult             = MatMult_SeqSBAIJ_3;
1435       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1436       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1437       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1438       break;
1439     case 4:
1440       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1441       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1442       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1443       B->ops->mult             = MatMult_SeqSBAIJ_4;
1444       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1445       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1446       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1447       break;
1448     case 5:
1449       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1450       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1451       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1452       B->ops->mult             = MatMult_SeqSBAIJ_5;
1453       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1454       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1455       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1456       break;
1457     case 6:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1459       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1460       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1461       B->ops->mult             = MatMult_SeqSBAIJ_6;
1462       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1463       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1464       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1465       break;
1466     case 7:
1467       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1468       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1469       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1470       B->ops->mult             = MatMult_SeqSBAIJ_7;
1471       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1472       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1473       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1474       break;
1475     }
1476   }
1477 
1478   b->mbs = mbs;
1479   b->nbs = mbs;
1480   if (!skipallocation) {
1481     /* b->ilen will count nonzeros in each block row so far. */
1482     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1483     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1484     if (!nnz) {
1485       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1486       else if (nz <= 0)        nz = 1;
1487       for (i=0; i<mbs; i++) {
1488         b->imax[i] = nz;
1489       }
1490       nz = nz*mbs; /* total nz */
1491     } else {
1492       nz = 0;
1493       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1494     }
1495     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1496 
1497     /* allocate the matrix space */
1498     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1499     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1500     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1501     b->singlemalloc = PETSC_TRUE;
1502 
1503     /* pointer to beginning of each row */
1504     b->i[0] = 0;
1505     for (i=1; i<mbs+1; i++) {
1506       b->i[i] = b->i[i-1] + b->imax[i-1];
1507     }
1508     b->free_a     = PETSC_TRUE;
1509     b->free_ij    = PETSC_TRUE;
1510   } else {
1511     b->free_a     = PETSC_FALSE;
1512     b->free_ij    = PETSC_FALSE;
1513   }
1514 
1515   B->rmap.bs               = bs;
1516   b->bs2              = bs2;
1517   b->nz             = 0;
1518   b->maxnz          = nz*bs2;
1519 
1520   b->inew             = 0;
1521   b->jnew             = 0;
1522   b->anew             = 0;
1523   b->a2anew           = 0;
1524   b->permute          = PETSC_FALSE;
1525   PetscFunctionReturn(0);
1526 }
1527 EXTERN_C_END
1528 
1529 EXTERN_C_BEGIN
1530 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1531 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1532 EXTERN_C_END
1533 
1534 /*MC
1535   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1536   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1537 
1538   Options Database Keys:
1539   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1540 
1541   Level: beginner
1542 
1543   .seealso: MatCreateSeqSBAIJ
1544 M*/
1545 
1546 EXTERN_C_BEGIN
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1550 {
1551   Mat_SeqSBAIJ   *b;
1552   PetscErrorCode ierr;
1553   PetscMPIInt    size;
1554   PetscTruth     flg;
1555 
1556   PetscFunctionBegin;
1557   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1558   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1559 
1560   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1561   B->data = (void*)b;
1562   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1563   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1564   B->ops->view        = MatView_SeqSBAIJ;
1565   B->factor           = 0;
1566   B->mapping          = 0;
1567   b->row              = 0;
1568   b->icol             = 0;
1569   b->reallocs         = 0;
1570   b->saved_values     = 0;
1571 
1572 
1573   b->roworiented      = PETSC_TRUE;
1574   b->nonew            = 0;
1575   b->diag             = 0;
1576   b->solve_work       = 0;
1577   b->mult_work        = 0;
1578   B->spptr            = 0;
1579   b->keepzeroedrows   = PETSC_FALSE;
1580   b->xtoy             = 0;
1581   b->XtoY             = 0;
1582 
1583   b->inew             = 0;
1584   b->jnew             = 0;
1585   b->anew             = 0;
1586   b->a2anew           = 0;
1587   b->permute          = PETSC_FALSE;
1588 
1589   b->ignore_ltriangular = PETSC_FALSE;
1590   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1591   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1592 
1593   b->getrow_utriangular = PETSC_FALSE;
1594   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1595   if (flg) b->getrow_utriangular = PETSC_TRUE;
1596 
1597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1598                                      "MatStoreValues_SeqSBAIJ",
1599                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1601                                      "MatRetrieveValues_SeqSBAIJ",
1602                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1604                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1605                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1607                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1608                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1610                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1611                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1613                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1614                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1615 
1616   B->symmetric                  = PETSC_TRUE;
1617   B->structurally_symmetric     = PETSC_TRUE;
1618   B->symmetric_set              = PETSC_TRUE;
1619   B->structurally_symmetric_set = PETSC_TRUE;
1620   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 EXTERN_C_END
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1627 /*@C
1628    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1629    compressed row) format.  For good matrix assembly performance the
1630    user should preallocate the matrix storage by setting the parameter nz
1631    (or the array nnz).  By setting these parameters accurately, performance
1632    during matrix assembly can be increased by more than a factor of 50.
1633 
1634    Collective on Mat
1635 
1636    Input Parameters:
1637 +  A - the symmetric matrix
1638 .  bs - size of block
1639 .  nz - number of block nonzeros per block row (same for all rows)
1640 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1641          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1642 
1643    Options Database Keys:
1644 .   -mat_no_unroll - uses code that does not unroll the loops in the
1645                      block calculations (much slower)
1646 .    -mat_block_size - size of the blocks to use
1647 
1648    Level: intermediate
1649 
1650    Notes:
1651    Specify the preallocated storage with either nz or nnz (not both).
1652    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1653    allocation.  For additional details, see the users manual chapter on
1654    matrices.
1655 
1656    If the nnz parameter is given then the nz parameter is ignored
1657 
1658 
1659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1660 @*/
1661 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1662 {
1663   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1667   if (f) {
1668     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1669   }
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatCreateSeqSBAIJ"
1675 /*@C
1676    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1677    compressed row) format.  For good matrix assembly performance the
1678    user should preallocate the matrix storage by setting the parameter nz
1679    (or the array nnz).  By setting these parameters accurately, performance
1680    during matrix assembly can be increased by more than a factor of 50.
1681 
1682    Collective on MPI_Comm
1683 
1684    Input Parameters:
1685 +  comm - MPI communicator, set to PETSC_COMM_SELF
1686 .  bs - size of block
1687 .  m - number of rows, or number of columns
1688 .  nz - number of block nonzeros per block row (same for all rows)
1689 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1690          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1691 
1692    Output Parameter:
1693 .  A - the symmetric matrix
1694 
1695    Options Database Keys:
1696 .   -mat_no_unroll - uses code that does not unroll the loops in the
1697                      block calculations (much slower)
1698 .    -mat_block_size - size of the blocks to use
1699 
1700    Level: intermediate
1701 
1702    Notes:
1703    The number of rows and columns must be divisible by blocksize.
1704    This matrix type does not support complex Hermitian operation.
1705 
1706    Specify the preallocated storage with either nz or nnz (not both).
1707    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1708    allocation.  For additional details, see the users manual chapter on
1709    matrices.
1710 
1711    If the nnz parameter is given then the nz parameter is ignored
1712 
1713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1714 @*/
1715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1716 {
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1721   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1722   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1723   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1729 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1730 {
1731   Mat            C;
1732   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1733   PetscErrorCode ierr;
1734   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1735 
1736   PetscFunctionBegin;
1737   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1738 
1739   *B = 0;
1740   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1741   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1742   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1743   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1744   c    = (Mat_SeqSBAIJ*)C->data;
1745 
1746   C->preallocated   = PETSC_TRUE;
1747   C->factor         = A->factor;
1748   c->row            = 0;
1749   c->icol           = 0;
1750   c->saved_values   = 0;
1751   c->keepzeroedrows = a->keepzeroedrows;
1752   C->assembled      = PETSC_TRUE;
1753 
1754   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1755   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1756   c->bs2  = a->bs2;
1757   c->mbs  = a->mbs;
1758   c->nbs  = a->nbs;
1759 
1760   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1761   for (i=0; i<mbs; i++) {
1762     c->imax[i] = a->imax[i];
1763     c->ilen[i] = a->ilen[i];
1764   }
1765 
1766   /* allocate the matrix space */
1767   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1768   c->singlemalloc = PETSC_TRUE;
1769   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1771   if (mbs > 0) {
1772     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1773     if (cpvalues == MAT_COPY_VALUES) {
1774       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1775     } else {
1776       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1777     }
1778   }
1779 
1780   c->roworiented = a->roworiented;
1781   c->nonew       = a->nonew;
1782 
1783   if (a->diag) {
1784     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1785     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1786     for (i=0; i<mbs; i++) {
1787       c->diag[i] = a->diag[i];
1788     }
1789   } else c->diag  = 0;
1790   c->nz           = a->nz;
1791   c->maxnz        = a->maxnz;
1792   c->solve_work   = 0;
1793   c->mult_work    = 0;
1794   c->free_a       = PETSC_TRUE;
1795   c->free_ij      = PETSC_TRUE;
1796   *B = C;
1797   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1803 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1804 {
1805   Mat_SeqSBAIJ   *a;
1806   Mat            B;
1807   PetscErrorCode ierr;
1808   int            fd;
1809   PetscMPIInt    size;
1810   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1811   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1812   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1813   PetscInt       *masked,nmask,tmp,bs2,ishift;
1814   PetscScalar    *aa;
1815   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1816 
1817   PetscFunctionBegin;
1818   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1819   bs2  = bs*bs;
1820 
1821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1822   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1823   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1824   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1825   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1826   M = header[1]; N = header[2]; nz = header[3];
1827 
1828   if (header[3] < 0) {
1829     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1830   }
1831 
1832   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1833 
1834   /*
1835      This code adds extra rows to make sure the number of rows is
1836     divisible by the blocksize
1837   */
1838   mbs        = M/bs;
1839   extra_rows = bs - M + bs*(mbs);
1840   if (extra_rows == bs) extra_rows = 0;
1841   else                  mbs++;
1842   if (extra_rows) {
1843     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1844   }
1845 
1846   /* read in row lengths */
1847   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1848   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1849   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1850 
1851   /* read in column indices */
1852   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1853   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1854   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1855 
1856   /* loop over row lengths determining block row lengths */
1857   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1858   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1859   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1860   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1861   masked   = mask + mbs;
1862   rowcount = 0; nzcount = 0;
1863   for (i=0; i<mbs; i++) {
1864     nmask = 0;
1865     for (j=0; j<bs; j++) {
1866       kmax = rowlengths[rowcount];
1867       for (k=0; k<kmax; k++) {
1868         tmp = jj[nzcount++]/bs;   /* block col. index */
1869         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1870       }
1871       rowcount++;
1872     }
1873     s_browlengths[i] += nmask;
1874 
1875     /* zero out the mask elements we set */
1876     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1877   }
1878 
1879   /* create our matrix */
1880   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1881   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1882   ierr = MatSetType(B,type);CHKERRQ(ierr);
1883   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1884   a = (Mat_SeqSBAIJ*)B->data;
1885 
1886   /* set matrix "i" values */
1887   a->i[0] = 0;
1888   for (i=1; i<= mbs; i++) {
1889     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1890     a->ilen[i-1] = s_browlengths[i-1];
1891   }
1892   a->nz = a->i[mbs];
1893 
1894   /* read in nonzero values */
1895   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1896   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1897   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1898 
1899   /* set "a" and "j" values into matrix */
1900   nzcount = 0; jcount = 0;
1901   for (i=0; i<mbs; i++) {
1902     nzcountb = nzcount;
1903     nmask    = 0;
1904     for (j=0; j<bs; j++) {
1905       kmax = rowlengths[i*bs+j];
1906       for (k=0; k<kmax; k++) {
1907         tmp = jj[nzcount++]/bs; /* block col. index */
1908         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1909       }
1910     }
1911     /* sort the masked values */
1912     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1913 
1914     /* set "j" values into matrix */
1915     maskcount = 1;
1916     for (j=0; j<nmask; j++) {
1917       a->j[jcount++]  = masked[j];
1918       mask[masked[j]] = maskcount++;
1919     }
1920 
1921     /* set "a" values into matrix */
1922     ishift = bs2*a->i[i];
1923     for (j=0; j<bs; j++) {
1924       kmax = rowlengths[i*bs+j];
1925       for (k=0; k<kmax; k++) {
1926         tmp       = jj[nzcountb]/bs ; /* block col. index */
1927         if (tmp >= i){
1928           block     = mask[tmp] - 1;
1929           point     = jj[nzcountb] - bs*tmp;
1930           idx       = ishift + bs2*block + j + bs*point;
1931           a->a[idx] = aa[nzcountb];
1932         }
1933         nzcountb++;
1934       }
1935     }
1936     /* zero out the mask elements we set */
1937     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1938   }
1939   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1940 
1941   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1942   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1943   ierr = PetscFree(aa);CHKERRQ(ierr);
1944   ierr = PetscFree(jj);CHKERRQ(ierr);
1945   ierr = PetscFree(mask);CHKERRQ(ierr);
1946 
1947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1949   ierr = MatView_Private(B);CHKERRQ(ierr);
1950   *A = B;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1956 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1957 {
1958   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1959   MatScalar      *aa=a->a,*v,*v1;
1960   PetscScalar    *x,*b,*t,sum,d;
1961   PetscErrorCode ierr;
1962   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1963   PetscInt       nz,nz1,*vj,*vj1,i;
1964 
1965   PetscFunctionBegin;
1966   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1967 
1968   its = its*lits;
1969   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1970 
1971   if (bs > 1)
1972     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1973 
1974   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1975   if (xx != bb) {
1976     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1977   } else {
1978     b = x;
1979   }
1980 
1981   if (!a->relax_work) {
1982     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
1983   }
1984   t = a->relax_work;
1985 
1986   if (flag & SOR_ZERO_INITIAL_GUESS) {
1987     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1988       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
1989 
1990       for (i=0; i<m; i++){
1991         d  = *(aa + ai[i]);  /* diag[i] */
1992         v  = aa + ai[i] + 1;
1993         vj = aj + ai[i] + 1;
1994         nz = ai[i+1] - ai[i] - 1;
1995         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
1996         x[i] = omega*t[i]/d;
1997         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1998       }
1999     }
2000 
2001     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2002       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2003         t = b;
2004       }
2005 
2006       for (i=m-1; i>=0; i--){
2007         d  = *(aa + ai[i]);
2008         v  = aa + ai[i] + 1;
2009         vj = aj + ai[i] + 1;
2010         nz = ai[i+1] - ai[i] - 1;
2011         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2012         sum = t[i];
2013         while (nz--) sum -= x[*vj++]*(*v++);
2014         x[i] =   (1-omega)*x[i] + omega*sum/d;
2015       }
2016       t = a->relax_work;
2017     }
2018     its--;
2019   }
2020 
2021   while (its--) {
2022     /*
2023        forward sweep:
2024        for i=0,...,m-1:
2025          sum[i] = (b[i] - U(i,:)x )/d[i];
2026          x[i]   = (1-omega)x[i] + omega*sum[i];
2027          b      = b - x[i]*U^T(i,:);
2028 
2029     */
2030     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2031       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2032 
2033       for (i=0; i<m; i++){
2034         d  = *(aa + ai[i]);  /* diag[i] */
2035         v  = aa + ai[i] + 1; v1=v;
2036         vj = aj + ai[i] + 1; vj1=vj;
2037         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2038         sum = t[i];
2039         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2040         while (nz1--) sum -= (*v1++)*x[*vj1++];
2041         x[i] = (1-omega)*x[i] + omega*sum/d;
2042         while (nz--) t[*vj++] -= x[i]*(*v++);
2043       }
2044     }
2045 
2046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2047       /*
2048        backward sweep:
2049        b = b - x[i]*U^T(i,:), i=0,...,n-2
2050        for i=m-1,...,0:
2051          sum[i] = (b[i] - U(i,:)x )/d[i];
2052          x[i]   = (1-omega)x[i] + omega*sum[i];
2053       */
2054       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2055       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2056 
2057       for (i=0; i<m-1; i++){  /* update rhs */
2058         v  = aa + ai[i] + 1;
2059         vj = aj + ai[i] + 1;
2060         nz = ai[i+1] - ai[i] - 1;
2061         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2062         while (nz--) t[*vj++] -= x[i]*(*v++);
2063       }
2064       for (i=m-1; i>=0; i--){
2065         d  = *(aa + ai[i]);
2066         v  = aa + ai[i] + 1;
2067         vj = aj + ai[i] + 1;
2068         nz = ai[i+1] - ai[i] - 1;
2069         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2070         sum = t[i];
2071         while (nz--) sum -= x[*vj++]*(*v++);
2072         x[i] =   (1-omega)*x[i] + omega*sum/d;
2073       }
2074     }
2075   }
2076 
2077   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2078   if (bb != xx) {
2079     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2086 /*@
2087      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2088               (upper triangular entries in CSR format) provided by the user.
2089 
2090      Collective on MPI_Comm
2091 
2092    Input Parameters:
2093 +  comm - must be an MPI communicator of size 1
2094 .  bs - size of block
2095 .  m - number of rows
2096 .  n - number of columns
2097 .  i - row indices
2098 .  j - column indices
2099 -  a - matrix values
2100 
2101    Output Parameter:
2102 .  mat - the matrix
2103 
2104    Level: intermediate
2105 
2106    Notes:
2107        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2108     once the matrix is destroyed
2109 
2110        You cannot set new nonzero locations into this matrix, that will generate an error.
2111 
2112        The i and j indices are 0 based
2113 
2114 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2115 
2116 @*/
2117 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2118 {
2119   PetscErrorCode ierr;
2120   PetscInt       ii;
2121   Mat_SeqSBAIJ   *sbaij;
2122 
2123   PetscFunctionBegin;
2124   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2125   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2126 
2127   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2128   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2129   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2130   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2131   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2132   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2133 
2134   sbaij->i = i;
2135   sbaij->j = j;
2136   sbaij->a = a;
2137   sbaij->singlemalloc = PETSC_FALSE;
2138   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2139   sbaij->free_a       = PETSC_FALSE;
2140   sbaij->free_ij      = PETSC_FALSE;
2141 
2142   for (ii=0; ii<m; ii++) {
2143     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2144 #if defined(PETSC_USE_DEBUG)
2145     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2146 #endif
2147   }
2148 #if defined(PETSC_USE_DEBUG)
2149   for (ii=0; ii<sbaij->i[m]; ii++) {
2150     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2151     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2152   }
2153 #endif
2154 
2155   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 
2161 
2162 
2163 
2164