xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   *missing = PETSC_FALSE;
28   for (i=0; i<a->mbs; i++) {
29     if (jj[diag[i]] != i) {
30       *missing    = PETSC_TRUE;
31       if (dd) *dd = i;
32       break;
33     }
34   }
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
41 {
42   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45 
46   PetscFunctionBegin;
47   if (!a->diag) {
48     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
49   }
50   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
57 {
58   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
59   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   *nn = n;
64   if (!ia) PetscFunctionReturn(0);
65   if (!blockcompressed) {
66     /* malloc & create the natural set of indices */
67     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
68     for (i=0; i<n+1; i++) {
69       for (j=0; j<bs; j++) {
70         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
71       }
72     }
73     for (i=0; i<nz; i++) {
74       for (j=0; j<bs; j++) {
75         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
76       }
77     }
78   } else { /* blockcompressed */
79     if (oshift == 1) {
80       /* temporarily add 1 to i and j indices */
81       for (i=0; i<nz; i++) a->j[i]++;
82       for (i=0; i<n+1; i++) a->i[i]++;
83     }
84     *ia = a->i; *ja = a->j;
85   }
86 
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
93 {
94   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
95   PetscInt     i,n = a->mbs,nz = a->i[n];
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   if (!ia) PetscFunctionReturn(0);
100 
101   if (!blockcompressed) {
102     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
103   } else if (oshift == 1) { /* blockcompressed */
104     for (i=0; i<nz; i++) a->j[i]--;
105     for (i=0; i<n+1; i++) a->i[i]--;
106   }
107 
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
114 {
115   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119 #if defined(PETSC_USE_LOG)
120   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
121 #endif
122   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
123   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
124   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
125   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
126   ierr = PetscFree(a->diag);CHKERRQ(ierr);
127   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
128   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
129   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
130   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
131   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
132   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
133   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
134 
135   ierr = PetscFree(a->inew);CHKERRQ(ierr);
136   ierr = PetscFree(a);CHKERRQ(ierr);
137 
138   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
140   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
141   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
142   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
143   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
144   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
151 {
152   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
153   PetscErrorCode ierr;
154 
155   PetscFunctionBegin;
156   switch (op) {
157   case MAT_ROW_ORIENTED:
158     a->roworiented = flg;
159     break;
160   case MAT_KEEP_ZEROED_ROWS:
161     a->keepzeroedrows = flg;
162     break;
163   case MAT_NEW_NONZERO_LOCATIONS:
164     a->nonew = (flg ? 0 : 1);
165     break;
166   case MAT_NEW_NONZERO_LOCATION_ERR:
167     a->nonew = (flg ? -1 : 0);
168     break;
169   case MAT_NEW_NONZERO_ALLOCATION_ERR:
170     a->nonew = (flg ? -2 : 0);
171     break;
172   case MAT_NEW_DIAGONALS:
173   case MAT_IGNORE_OFF_PROC_ENTRIES:
174   case MAT_USE_HASH_TABLE:
175     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
176     break;
177   case MAT_HERMITIAN:
178     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
179   case MAT_SYMMETRIC:
180   case MAT_STRUCTURALLY_SYMMETRIC:
181   case MAT_SYMMETRY_ETERNAL:
182     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
183     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
184     break;
185   case MAT_IGNORE_LOWER_TRIANGULAR:
186     a->ignore_ltriangular = flg;
187     break;
188   case MAT_ERROR_LOWER_TRIANGULAR:
189     a->ignore_ltriangular = flg;
190     break;
191   case MAT_GETROW_UPPERTRIANGULAR:
192     a->getrow_utriangular = flg;
193     break;
194   default:
195     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
203 {
204   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
205   PetscErrorCode ierr;
206   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
207   MatScalar      *aa,*aa_i;
208   PetscScalar    *v_i;
209 
210   PetscFunctionBegin;
211   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
212   /* Get the upper triangular part of the row */
213   bs  = A->rmap.bs;
214   ai  = a->i;
215   aj  = a->j;
216   aa  = a->a;
217   bs2 = a->bs2;
218 
219   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
220 
221   bn  = row/bs;   /* Block number */
222   bp  = row % bs; /* Block position */
223   M   = ai[bn+1] - ai[bn];
224   *ncols = bs*M;
225 
226   if (v) {
227     *v = 0;
228     if (*ncols) {
229       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
230       for (i=0; i<M; i++) { /* for each block in the block row */
231         v_i  = *v + i*bs;
232         aa_i = aa + bs2*(ai[bn] + i);
233         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
234       }
235     }
236   }
237 
238   if (cols) {
239     *cols = 0;
240     if (*ncols) {
241       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
242       for (i=0; i<M; i++) { /* for each block in the block row */
243         cols_i = *cols + i*bs;
244         itmp  = bs*aj[ai[bn] + i];
245         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
246       }
247     }
248   }
249 
250   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
251   /* this segment is currently removed, so only entries in the upper triangle are obtained */
252 #ifdef column_search
253   v_i    = *v    + M*bs;
254   cols_i = *cols + M*bs;
255   for (i=0; i<bn; i++){ /* for each block row */
256     M = ai[i+1] - ai[i];
257     for (j=0; j<M; j++){
258       itmp = aj[ai[i] + j];    /* block column value */
259       if (itmp == bn){
260         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
261         for (k=0; k<bs; k++) {
262           *cols_i++ = i*bs+k;
263           *v_i++    = aa_i[k];
264         }
265         *ncols += bs;
266         break;
267       }
268     }
269   }
270 #endif
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
277 {
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
282   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
289 {
290   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
291 
292   PetscFunctionBegin;
293   a->getrow_utriangular = PETSC_TRUE;
294   PetscFunctionReturn(0);
295 }
296 #undef __FUNCT__
297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
299 {
300   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
301 
302   PetscFunctionBegin;
303   a->getrow_utriangular = PETSC_FALSE;
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
310 {
311   PetscErrorCode ierr;
312   PetscFunctionBegin;
313   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
319 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
320 {
321   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
322   PetscErrorCode    ierr;
323   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
324   const char        *name;
325   PetscViewerFormat format;
326 
327   PetscFunctionBegin;
328   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
329   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
330   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
331     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
332   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
333     Mat aij;
334 
335     if (A->factor && bs>1){
336       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr);
337       PetscFunctionReturn(0);
338     }
339     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
340     ierr = MatView(aij,viewer);CHKERRQ(ierr);
341     ierr = MatDestroy(aij);CHKERRQ(ierr);
342   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
343     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
344     for (i=0; i<a->mbs; i++) {
345       for (j=0; j<bs; j++) {
346         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
347         for (k=a->i[i]; k<a->i[i+1]; k++) {
348           for (l=0; l<bs; l++) {
349 #if defined(PETSC_USE_COMPLEX)
350             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
351               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
352                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
353             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
354               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
355                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
356             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
357               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
358             }
359 #else
360             if (a->a[bs2*k + l*bs + j] != 0.0) {
361               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
362             }
363 #endif
364           }
365         }
366         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
367       }
368     }
369     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
370   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
371      PetscFunctionReturn(0);
372   } else {
373     if (A->factor && bs>1){
374       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
375     }
376     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
377     for (i=0; i<a->mbs; i++) {
378       for (j=0; j<bs; j++) {
379         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
380         for (k=a->i[i]; k<a->i[i+1]; k++) {
381           for (l=0; l<bs; l++) {
382 #if defined(PETSC_USE_COMPLEX)
383             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
384               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
385                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
386             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
387               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
388                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             } else {
390               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
391             }
392 #else
393             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
394 #endif
395           }
396         }
397         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
398       }
399     }
400     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
401   }
402   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
408 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
409 {
410   Mat            A = (Mat) Aa;
411   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
412   PetscErrorCode ierr;
413   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
414   PetscMPIInt    rank;
415   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
416   MatScalar      *aa;
417   MPI_Comm       comm;
418   PetscViewer    viewer;
419 
420   PetscFunctionBegin;
421   /*
422     This is nasty. If this is called from an originally parallel matrix
423     then all processes call this,but only the first has the matrix so the
424     rest should return immediately.
425   */
426   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
427   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
428   if (rank) PetscFunctionReturn(0);
429 
430   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
431 
432   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
433   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
434 
435   /* loop over matrix elements drawing boxes */
436   color = PETSC_DRAW_BLUE;
437   for (i=0,row=0; i<mbs; i++,row+=bs) {
438     for (j=a->i[i]; j<a->i[i+1]; j++) {
439       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
440       x_l = a->j[j]*bs; x_r = x_l + 1.0;
441       aa = a->a + j*bs2;
442       for (k=0; k<bs; k++) {
443         for (l=0; l<bs; l++) {
444           if (PetscRealPart(*aa++) >=  0.) continue;
445           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
446         }
447       }
448     }
449   }
450   color = PETSC_DRAW_CYAN;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) != 0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464 
465   color = PETSC_DRAW_RED;
466   for (i=0,row=0; i<mbs; i++,row+=bs) {
467     for (j=a->i[i]; j<a->i[i+1]; j++) {
468       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
469       x_l = a->j[j]*bs; x_r = x_l + 1.0;
470       aa = a->a + j*bs2;
471       for (k=0; k<bs; k++) {
472         for (l=0; l<bs; l++) {
473           if (PetscRealPart(*aa++) <= 0.) continue;
474           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
484 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
485 {
486   PetscErrorCode ierr;
487   PetscReal      xl,yl,xr,yr,w,h;
488   PetscDraw      draw;
489   PetscTruth     isnull;
490 
491   PetscFunctionBegin;
492   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
493   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
494 
495   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
496   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
497   xr += w;    yr += h;  xl = -w;     yl = -h;
498   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
499   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
500   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatView_SeqSBAIJ"
506 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
507 {
508   PetscErrorCode ierr;
509   PetscTruth     iascii,isdraw;
510 
511   PetscFunctionBegin;
512   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
513   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
514   if (iascii){
515     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
516   } else if (isdraw) {
517     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
518   } else {
519     Mat B;
520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
521     ierr = MatView(B,viewer);CHKERRQ(ierr);
522     ierr = MatDestroy(B);CHKERRQ(ierr);
523   }
524   PetscFunctionReturn(0);
525 }
526 
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
530 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
531 {
532   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
533   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
534   PetscInt     *ai = a->i,*ailen = a->ilen;
535   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
536   MatScalar    *ap,*aa = a->a;
537 
538   PetscFunctionBegin;
539   for (k=0; k<m; k++) { /* loop over rows */
540     row  = im[k]; brow = row/bs;
541     if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
542     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
543     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
544     nrow = ailen[brow];
545     for (l=0; l<n; l++) { /* loop over columns */
546       if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
547       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
548       col  = in[l] ;
549       bcol = col/bs;
550       cidx = col%bs;
551       ridx = row%bs;
552       high = nrow;
553       low  = 0; /* assume unsorted */
554       while (high-low > 5) {
555         t = (low+high)/2;
556         if (rp[t] > bcol) high = t;
557         else             low  = t;
558       }
559       for (i=low; i<high; i++) {
560         if (rp[i] > bcol) break;
561         if (rp[i] == bcol) {
562           *v++ = ap[bs2*i+bs*cidx+ridx];
563           goto finished;
564         }
565       }
566       *v++ = 0.0;
567       finished:;
568     }
569   }
570   PetscFunctionReturn(0);
571 }
572 
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
576 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
577 {
578   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
579   PetscErrorCode    ierr;
580   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
581   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
582   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
583   PetscTruth        roworiented=a->roworiented;
584   const PetscScalar *value = v;
585   MatScalar         *ap,*aa = a->a,*bap;
586 
587   PetscFunctionBegin;
588   if (roworiented) {
589     stepval = (n-1)*bs;
590   } else {
591     stepval = (m-1)*bs;
592   }
593   for (k=0; k<m; k++) { /* loop over added rows */
594     row  = im[k];
595     if (row < 0) continue;
596 #if defined(PETSC_USE_DEBUG)
597     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
598 #endif
599     rp   = aj + ai[row];
600     ap   = aa + bs2*ai[row];
601     rmax = imax[row];
602     nrow = ailen[row];
603     low  = 0;
604     high = nrow;
605     for (l=0; l<n; l++) { /* loop over added columns */
606       if (in[l] < 0) continue;
607       col = in[l];
608 #if defined(PETSC_USE_DEBUG)
609       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
610 #endif
611       if (col < row) continue; /* ignore lower triangular block */
612       if (roworiented) {
613         value = v + k*(stepval+bs)*bs + l*bs;
614       } else {
615         value = v + l*(stepval+bs)*bs + k*bs;
616       }
617       if (col <= lastcol) low = 0; else high = nrow;
618       lastcol = col;
619       while (high-low > 7) {
620         t = (low+high)/2;
621         if (rp[t] > col) high = t;
622         else             low  = t;
623       }
624       for (i=low; i<high; i++) {
625         if (rp[i] > col) break;
626         if (rp[i] == col) {
627           bap  = ap +  bs2*i;
628           if (roworiented) {
629             if (is == ADD_VALUES) {
630               for (ii=0; ii<bs; ii++,value+=stepval) {
631                 for (jj=ii; jj<bs2; jj+=bs) {
632                   bap[jj] += *value++;
633                 }
634               }
635             } else {
636               for (ii=0; ii<bs; ii++,value+=stepval) {
637                 for (jj=ii; jj<bs2; jj+=bs) {
638                   bap[jj] = *value++;
639                 }
640                }
641             }
642           } else {
643             if (is == ADD_VALUES) {
644               for (ii=0; ii<bs; ii++,value+=stepval) {
645                 for (jj=0; jj<bs; jj++) {
646                   *bap++ += *value++;
647                 }
648               }
649             } else {
650               for (ii=0; ii<bs; ii++,value+=stepval) {
651                 for (jj=0; jj<bs; jj++) {
652                   *bap++  = *value++;
653                 }
654               }
655             }
656           }
657           goto noinsert2;
658         }
659       }
660       if (nonew == 1) goto noinsert2;
661       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
662       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
663       N = nrow++ - 1; high++;
664       /* shift up all the later entries in this row */
665       for (ii=N; ii>=i; ii--) {
666         rp[ii+1] = rp[ii];
667         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
668       }
669       if (N >= i) {
670         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
671       }
672       rp[i] = col;
673       bap   = ap +  bs2*i;
674       if (roworiented) {
675         for (ii=0; ii<bs; ii++,value+=stepval) {
676           for (jj=ii; jj<bs2; jj+=bs) {
677             bap[jj] = *value++;
678           }
679         }
680       } else {
681         for (ii=0; ii<bs; ii++,value+=stepval) {
682           for (jj=0; jj<bs; jj++) {
683             *bap++  = *value++;
684           }
685         }
686        }
687     noinsert2:;
688       low = i;
689     }
690     ailen[row] = nrow;
691   }
692    PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
697 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
698 {
699   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
700   PetscErrorCode ierr;
701   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
702   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
703   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
704   MatScalar      *aa = a->a,*ap;
705 
706   PetscFunctionBegin;
707   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
708 
709   if (m) rmax = ailen[0];
710   for (i=1; i<mbs; i++) {
711     /* move each row back by the amount of empty slots (fshift) before it*/
712     fshift += imax[i-1] - ailen[i-1];
713      rmax   = PetscMax(rmax,ailen[i]);
714      if (fshift) {
715        ip = aj + ai[i]; ap = aa + bs2*ai[i];
716        N = ailen[i];
717        for (j=0; j<N; j++) {
718          ip[j-fshift] = ip[j];
719          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
720        }
721      }
722      ai[i] = ai[i-1] + ailen[i-1];
723   }
724   if (mbs) {
725     fshift += imax[mbs-1] - ailen[mbs-1];
726      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
727   }
728   /* reset ilen and imax for each row */
729   for (i=0; i<mbs; i++) {
730     ailen[i] = imax[i] = ai[i+1] - ai[i];
731   }
732   a->nz = ai[mbs];
733 
734   /* diagonals may have moved, reset it */
735   if (a->diag) {
736     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
737   }
738   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
739   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
740   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
741   a->reallocs          = 0;
742   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747    This function returns an array of flags which indicate the locations of contiguous
748    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
749    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
750    Assume: sizes should be long enough to hold all the values.
751 */
752 #undef __FUNCT__
753 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
754 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
755 {
756   PetscInt   i,j,k,row;
757   PetscTruth flg;
758 
759   PetscFunctionBegin;
760    for (i=0,j=0; i<n; j++) {
761      row = idx[i];
762      if (row%bs!=0) { /* Not the begining of a block */
763        sizes[j] = 1;
764        i++;
765      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
766        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
767        i++;
768      } else { /* Begining of the block, so check if the complete block exists */
769        flg = PETSC_TRUE;
770        for (k=1; k<bs; k++) {
771          if (row+k != idx[i+k]) { /* break in the block */
772            flg = PETSC_FALSE;
773            break;
774          }
775        }
776        if (flg) { /* No break in the bs */
777          sizes[j] = bs;
778          i+= bs;
779        } else {
780          sizes[j] = 1;
781          i++;
782        }
783      }
784    }
785    *bs_max = j;
786    PetscFunctionReturn(0);
787 }
788 
789 
790 /* Only add/insert a(i,j) with i<=j (blocks).
791    Any a(i,j) with i>j input by user is ingored.
792 */
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
796 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
797 {
798   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
801   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
802   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
803   PetscInt       ridx,cidx,bs2=a->bs2;
804   MatScalar      *ap,value,*aa=a->a,*bap;
805 
806   PetscFunctionBegin;
807   for (k=0; k<m; k++) { /* loop over added rows */
808     row  = im[k];       /* row number */
809     brow = row/bs;      /* block row number */
810     if (row < 0) continue;
811 #if defined(PETSC_USE_DEBUG)
812     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
813 #endif
814     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
815     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
816     rmax = imax[brow];  /* maximum space allocated for this row */
817     nrow = ailen[brow]; /* actual length of this row */
818     low  = 0;
819 
820     for (l=0; l<n; l++) { /* loop over added columns */
821       if (in[l] < 0) continue;
822 #if defined(PETSC_USE_DEBUG)
823       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
824 #endif
825       col = in[l];
826       bcol = col/bs;              /* block col number */
827 
828       if (brow > bcol) {
829         if (a->ignore_ltriangular){
830           continue; /* ignore lower triangular values */
831         } else {
832           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
833         }
834       }
835 
836       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
837       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
838         /* element value a(k,l) */
839         if (roworiented) {
840           value = v[l + k*n];
841         } else {
842           value = v[k + l*m];
843         }
844 
845         /* move pointer bap to a(k,l) quickly and add/insert value */
846         if (col <= lastcol) low = 0; high = nrow;
847         lastcol = col;
848         while (high-low > 7) {
849           t = (low+high)/2;
850           if (rp[t] > bcol) high = t;
851           else              low  = t;
852         }
853         for (i=low; i<high; i++) {
854           if (rp[i] > bcol) break;
855           if (rp[i] == bcol) {
856             bap  = ap +  bs2*i + bs*cidx + ridx;
857             if (is == ADD_VALUES) *bap += value;
858             else                  *bap  = value;
859             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
860             if (brow == bcol && ridx < cidx){
861               bap  = ap +  bs2*i + bs*ridx + cidx;
862               if (is == ADD_VALUES) *bap += value;
863               else                  *bap  = value;
864             }
865             goto noinsert1;
866           }
867         }
868 
869         if (nonew == 1) goto noinsert1;
870         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
871         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
872 
873         N = nrow++ - 1; high++;
874         /* shift up all the later entries in this row */
875         for (ii=N; ii>=i; ii--) {
876           rp[ii+1] = rp[ii];
877           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
878         }
879         if (N>=i) {
880           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
881         }
882         rp[i]                      = bcol;
883         ap[bs2*i + bs*cidx + ridx] = value;
884       noinsert1:;
885         low = i;
886       }
887     }   /* end of loop over added columns */
888     ailen[brow] = nrow;
889   }   /* end of loop over added rows */
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
895 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
896 {
897   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
898   Mat            outA;
899   PetscErrorCode ierr;
900   PetscTruth     row_identity;
901 
902   PetscFunctionBegin;
903   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
904   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
905   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
906   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
907 
908   outA        = inA;
909   inA->factor = FACTOR_CHOLESKY;
910 
911   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
912   /*
913     Blocksize < 8 have a special faster factorization/solver
914     for ICC(0) factorization with natural ordering
915   */
916   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
917   case 1:
918     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
919     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
920     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
921     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
922     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
923     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
924     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
925     break;
926   case 2:
927     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
928     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
929     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
930     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
931     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
932     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
933     break;
934   case 3:
935      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
936      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
937      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
938      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
939      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
940      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
941      break;
942   case 4:
943     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
944     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
945     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
946     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
947     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
948     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
949     break;
950   case 5:
951     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
952     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
953     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
954     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
955     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
956     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
957     break;
958   case 6:
959     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
960     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
961     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
962     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
963     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
964     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
965     break;
966   case 7:
967     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
968     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
969     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
970     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
971     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
972     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
973     break;
974   default:
975     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
976     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
977     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
978     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
979     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
980     break;
981   }
982 
983   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
984   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
985   a->row = row;
986   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
987   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
988   a->col = row;
989 
990   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
991   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
992   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
993 
994   if (!a->solve_work) {
995     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
996     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
997   }
998 
999   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 EXTERN_C_BEGIN
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1006 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1007 {
1008   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1009   PetscInt     i,nz,n;
1010 
1011   PetscFunctionBegin;
1012   nz = baij->maxnz;
1013   n  = mat->cmap.n;
1014   for (i=0; i<nz; i++) {
1015     baij->j[i] = indices[i];
1016   }
1017    baij->nz = nz;
1018    for (i=0; i<n; i++) {
1019      baij->ilen[i] = baij->imax[i];
1020    }
1021    PetscFunctionReturn(0);
1022 }
1023 EXTERN_C_END
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1027 /*@
1028   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1029   in the matrix.
1030 
1031   Input Parameters:
1032   +  mat     - the SeqSBAIJ matrix
1033   -  indices - the column indices
1034 
1035   Level: advanced
1036 
1037   Notes:
1038   This can be called if you have precomputed the nonzero structure of the
1039   matrix and want to provide it to the matrix object to improve the performance
1040   of the MatSetValues() operation.
1041 
1042   You MUST have set the correct numbers of nonzeros per row in the call to
1043   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1044 
1045   MUST be called before any calls to MatSetValues()
1046 
1047   .seealso: MatCreateSeqSBAIJ
1048 @*/
1049 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1050 {
1051   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1052 
1053   PetscFunctionBegin;
1054   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1055   PetscValidPointer(indices,2);
1056   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1057   if (f) {
1058     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1059   } else {
1060     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1067 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1068 {
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   /* If the two matrices have the same copy implementation, use fast copy. */
1073   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1074     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1075     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1076 
1077     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1078       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1079     }
1080     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1081   } else {
1082     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1083     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1084     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1091 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1092 {
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1102 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1103 {
1104   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1105   PetscFunctionBegin;
1106   *array = a->a;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1112 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1113 {
1114   PetscFunctionBegin;
1115   PetscFunctionReturn(0);
1116  }
1117 
1118 #include "petscblaslapack.h"
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1121 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1122 {
1123   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1124   PetscErrorCode ierr;
1125   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1126   PetscBLASInt   one = 1,bnz = PetscBLASIntCast(x->nz);
1127 
1128   PetscFunctionBegin;
1129   if (str == SAME_NONZERO_PATTERN) {
1130     PetscScalar alpha = a;
1131     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1132   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1133     if (y->xtoy && y->XtoY != X) {
1134       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1135       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1136     }
1137     if (!y->xtoy) { /* get xtoy */
1138       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1139       y->XtoY = X;
1140     }
1141     bs2 = bs*bs;
1142     for (i=0; i<x->nz; i++) {
1143       j = 0;
1144       while (j < bs2){
1145         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1146         j++;
1147       }
1148     }
1149     ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1150   } else {
1151     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1152     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1153     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1154   }
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1160 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1161 {
1162   PetscFunctionBegin;
1163   *flg = PETSC_TRUE;
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1169 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1170 {
1171    PetscFunctionBegin;
1172    *flg = PETSC_TRUE;
1173    PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1178 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1179  {
1180    PetscFunctionBegin;
1181    *flg = PETSC_FALSE;
1182    PetscFunctionReturn(0);
1183  }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1187 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1188 {
1189   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1190   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1191   MatScalar      *aa = a->a;
1192 
1193   PetscFunctionBegin;
1194   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1200 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1201 {
1202   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1203   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1204   MatScalar      *aa = a->a;
1205 
1206   PetscFunctionBegin;
1207   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /* -------------------------------------------------------------------*/
1212 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1213        MatGetRow_SeqSBAIJ,
1214        MatRestoreRow_SeqSBAIJ,
1215        MatMult_SeqSBAIJ_N,
1216 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1217        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1218        MatMultAdd_SeqSBAIJ_N,
1219        MatSolve_SeqSBAIJ_N,
1220        0,
1221        0,
1222 /*10*/ 0,
1223        0,
1224        MatCholeskyFactor_SeqSBAIJ,
1225        MatRelax_SeqSBAIJ,
1226        MatTranspose_SeqSBAIJ,
1227 /*15*/ MatGetInfo_SeqSBAIJ,
1228        MatEqual_SeqSBAIJ,
1229        MatGetDiagonal_SeqSBAIJ,
1230        MatDiagonalScale_SeqSBAIJ,
1231        MatNorm_SeqSBAIJ,
1232 /*20*/ 0,
1233        MatAssemblyEnd_SeqSBAIJ,
1234        0,
1235        MatSetOption_SeqSBAIJ,
1236        MatZeroEntries_SeqSBAIJ,
1237 /*25*/ 0,
1238        0,
1239        0,
1240        MatCholeskyFactorSymbolic_SeqSBAIJ,
1241        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1242 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1243        0,
1244        MatICCFactorSymbolic_SeqSBAIJ,
1245        MatGetArray_SeqSBAIJ,
1246        MatRestoreArray_SeqSBAIJ,
1247 /*35*/ MatDuplicate_SeqSBAIJ,
1248        MatForwardSolve_SeqSBAIJ_N,
1249        MatBackwardSolve_SeqSBAIJ_N,
1250        0,
1251        MatICCFactor_SeqSBAIJ,
1252 /*40*/ MatAXPY_SeqSBAIJ,
1253        MatGetSubMatrices_SeqSBAIJ,
1254        MatIncreaseOverlap_SeqSBAIJ,
1255        MatGetValues_SeqSBAIJ,
1256        MatCopy_SeqSBAIJ,
1257 /*45*/ 0,
1258        MatScale_SeqSBAIJ,
1259        0,
1260        0,
1261        0,
1262 /*50*/ 0,
1263        MatGetRowIJ_SeqSBAIJ,
1264        MatRestoreRowIJ_SeqSBAIJ,
1265        0,
1266        0,
1267 /*55*/ 0,
1268        0,
1269        0,
1270        0,
1271        MatSetValuesBlocked_SeqSBAIJ,
1272 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1273        0,
1274        0,
1275        0,
1276        0,
1277 /*65*/ 0,
1278        0,
1279        0,
1280        0,
1281        0,
1282 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1283        0,
1284        0,
1285        0,
1286        0,
1287 /*75*/ 0,
1288        0,
1289        0,
1290        0,
1291        0,
1292 /*80*/ 0,
1293        0,
1294        0,
1295 #if !defined(PETSC_USE_COMPLEX)
1296        MatGetInertia_SeqSBAIJ,
1297 #else
1298        0,
1299 #endif
1300        MatLoad_SeqSBAIJ,
1301 /*85*/ MatIsSymmetric_SeqSBAIJ,
1302        MatIsHermitian_SeqSBAIJ,
1303        MatIsStructurallySymmetric_SeqSBAIJ,
1304        0,
1305        0,
1306 /*90*/ 0,
1307        0,
1308        0,
1309        0,
1310        0,
1311 /*95*/ 0,
1312        0,
1313        0,
1314        0,
1315        0,
1316 /*100*/0,
1317        0,
1318        0,
1319        0,
1320        0,
1321 /*105*/0,
1322        MatRealPart_SeqSBAIJ,
1323        MatImaginaryPart_SeqSBAIJ,
1324        MatGetRowUpperTriangular_SeqSBAIJ,
1325        MatRestoreRowUpperTriangular_SeqSBAIJ,
1326 /*110*/0,
1327        0,
1328        0,
1329        0,
1330        MatMissingDiagonal_SeqSBAIJ
1331 };
1332 
1333 EXTERN_C_BEGIN
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1336 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1337 {
1338   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1339   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   if (aij->nonew != 1) {
1344     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1345   }
1346 
1347   /* allocate space for values if not already there */
1348   if (!aij->saved_values) {
1349     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1350   }
1351 
1352   /* copy values over */
1353   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 EXTERN_C_END
1357 
1358 EXTERN_C_BEGIN
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1361 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1362 {
1363   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1364   PetscErrorCode ierr;
1365   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1366 
1367   PetscFunctionBegin;
1368   if (aij->nonew != 1) {
1369     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1370   }
1371   if (!aij->saved_values) {
1372     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1373   }
1374 
1375   /* copy values over */
1376   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 EXTERN_C_END
1380 
1381 EXTERN_C_BEGIN
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1384 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1385 {
1386   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1387   PetscErrorCode ierr;
1388   PetscInt       i,mbs,bs2;
1389   PetscTruth     skipallocation = PETSC_FALSE,flg;
1390 
1391   PetscFunctionBegin;
1392   B->preallocated = PETSC_TRUE;
1393   ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1394   B->rmap.bs = B->cmap.bs = bs;
1395   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1396   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1397 
1398   mbs  = B->rmap.N/bs;
1399   bs2  = bs*bs;
1400 
1401   if (mbs*bs != B->rmap.N) {
1402     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1403   }
1404 
1405   if (nz == MAT_SKIP_ALLOCATION) {
1406     skipallocation = PETSC_TRUE;
1407     nz             = 0;
1408   }
1409 
1410   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1411   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1412   if (nnz) {
1413     for (i=0; i<mbs; i++) {
1414       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1415       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1416     }
1417   }
1418 
1419   ierr    = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1420   if (!flg) {
1421     switch (bs) {
1422     case 1:
1423       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1424       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1425       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1426       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1427       B->ops->mult             = MatMult_SeqSBAIJ_1;
1428       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1429       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1430       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1431       break;
1432     case 2:
1433       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1434       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1435       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1436       B->ops->mult             = MatMult_SeqSBAIJ_2;
1437       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1438       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1439       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1440       break;
1441     case 3:
1442       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1443       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1444       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1445       B->ops->mult             = MatMult_SeqSBAIJ_3;
1446       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1447       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1448       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1449       break;
1450     case 4:
1451       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1452       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1453       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1454       B->ops->mult             = MatMult_SeqSBAIJ_4;
1455       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1456       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1457       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1458       break;
1459     case 5:
1460       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1461       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1462       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1463       B->ops->mult             = MatMult_SeqSBAIJ_5;
1464       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1465       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1466       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1467       break;
1468     case 6:
1469       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1470       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1471       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1472       B->ops->mult             = MatMult_SeqSBAIJ_6;
1473       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1474       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1475       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1476       break;
1477     case 7:
1478       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1479       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1480       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1481       B->ops->mult             = MatMult_SeqSBAIJ_7;
1482       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1483       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1484       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1485       break;
1486     }
1487   }
1488 
1489   b->mbs = mbs;
1490   b->nbs = mbs;
1491   if (!skipallocation) {
1492     if (!b->imax) {
1493       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1494       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
1495     }
1496     if (!nnz) {
1497       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1498       else if (nz <= 0)        nz = 1;
1499       for (i=0; i<mbs; i++) {
1500         b->imax[i] = nz;
1501       }
1502       nz = nz*mbs; /* total nz */
1503     } else {
1504       nz = 0;
1505       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1506     }
1507     /* b->ilen will count nonzeros in each block row so far. */
1508     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1509     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1510 
1511     /* allocate the matrix space */
1512     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
1513     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1514     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
1515     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1516     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1517     b->singlemalloc = PETSC_TRUE;
1518 
1519     /* pointer to beginning of each row */
1520     b->i[0] = 0;
1521     for (i=1; i<mbs+1; i++) {
1522       b->i[i] = b->i[i-1] + b->imax[i-1];
1523     }
1524     b->free_a     = PETSC_TRUE;
1525     b->free_ij    = PETSC_TRUE;
1526   } else {
1527     b->free_a     = PETSC_FALSE;
1528     b->free_ij    = PETSC_FALSE;
1529   }
1530 
1531   B->rmap.bs               = bs;
1532   b->bs2              = bs2;
1533   b->nz             = 0;
1534   b->maxnz          = nz*bs2;
1535 
1536   b->inew             = 0;
1537   b->jnew             = 0;
1538   b->anew             = 0;
1539   b->a2anew           = 0;
1540   b->permute          = PETSC_FALSE;
1541   PetscFunctionReturn(0);
1542 }
1543 EXTERN_C_END
1544 
1545 EXTERN_C_BEGIN
1546 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1547 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1548 EXTERN_C_END
1549 
1550 /*MC
1551   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1552   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1553 
1554   Options Database Keys:
1555   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1556 
1557   Level: beginner
1558 
1559   .seealso: MatCreateSeqSBAIJ
1560 M*/
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1565 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1566 {
1567   Mat_SeqSBAIJ   *b;
1568   PetscErrorCode ierr;
1569   PetscMPIInt    size;
1570   PetscTruth     flg;
1571 
1572   PetscFunctionBegin;
1573   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
1574   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1575 
1576   ierr    = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1577   B->data = (void*)b;
1578   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1579   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1580   B->ops->view        = MatView_SeqSBAIJ;
1581   B->factor           = 0;
1582   B->mapping          = 0;
1583   b->row              = 0;
1584   b->icol             = 0;
1585   b->reallocs         = 0;
1586   b->saved_values     = 0;
1587 
1588 
1589   b->roworiented      = PETSC_TRUE;
1590   b->nonew            = 0;
1591   b->diag             = 0;
1592   b->solve_work       = 0;
1593   b->mult_work        = 0;
1594   B->spptr            = 0;
1595   b->keepzeroedrows   = PETSC_FALSE;
1596   b->xtoy             = 0;
1597   b->XtoY             = 0;
1598 
1599   b->inew             = 0;
1600   b->jnew             = 0;
1601   b->anew             = 0;
1602   b->a2anew           = 0;
1603   b->permute          = PETSC_FALSE;
1604 
1605   b->ignore_ltriangular = PETSC_FALSE;
1606   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1607   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1608 
1609   b->getrow_utriangular = PETSC_FALSE;
1610   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1611   if (flg) b->getrow_utriangular = PETSC_TRUE;
1612 
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1614                                      "MatStoreValues_SeqSBAIJ",
1615                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1617                                      "MatRetrieveValues_SeqSBAIJ",
1618                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1620                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1621                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1623                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1624                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1626                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1627                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1629                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1630                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1631 
1632   B->symmetric                  = PETSC_TRUE;
1633   B->structurally_symmetric     = PETSC_TRUE;
1634   B->symmetric_set              = PETSC_TRUE;
1635   B->structurally_symmetric_set = PETSC_TRUE;
1636   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 EXTERN_C_END
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1643 /*@C
1644    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1645    compressed row) format.  For good matrix assembly performance the
1646    user should preallocate the matrix storage by setting the parameter nz
1647    (or the array nnz).  By setting these parameters accurately, performance
1648    during matrix assembly can be increased by more than a factor of 50.
1649 
1650    Collective on Mat
1651 
1652    Input Parameters:
1653 +  A - the symmetric matrix
1654 .  bs - size of block
1655 .  nz - number of block nonzeros per block row (same for all rows)
1656 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1657          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1658 
1659    Options Database Keys:
1660 .   -mat_no_unroll - uses code that does not unroll the loops in the
1661                      block calculations (much slower)
1662 .    -mat_block_size - size of the blocks to use
1663 
1664    Level: intermediate
1665 
1666    Notes:
1667    Specify the preallocated storage with either nz or nnz (not both).
1668    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1669    allocation.  For additional details, see the users manual chapter on
1670    matrices.
1671 
1672    You can call MatGetInfo() to get information on how effective the preallocation was;
1673    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1674    You can also run with the option -info and look for messages with the string
1675    malloc in them to see if additional memory allocation was needed.
1676 
1677    If the nnz parameter is given then the nz parameter is ignored
1678 
1679 
1680 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1681 @*/
1682 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1683 {
1684   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1688   if (f) {
1689     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatCreateSeqSBAIJ"
1696 /*@C
1697    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1698    compressed row) format.  For good matrix assembly performance the
1699    user should preallocate the matrix storage by setting the parameter nz
1700    (or the array nnz).  By setting these parameters accurately, performance
1701    during matrix assembly can be increased by more than a factor of 50.
1702 
1703    Collective on MPI_Comm
1704 
1705    Input Parameters:
1706 +  comm - MPI communicator, set to PETSC_COMM_SELF
1707 .  bs - size of block
1708 .  m - number of rows, or number of columns
1709 .  nz - number of block nonzeros per block row (same for all rows)
1710 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1711          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1712 
1713    Output Parameter:
1714 .  A - the symmetric matrix
1715 
1716    Options Database Keys:
1717 .   -mat_no_unroll - uses code that does not unroll the loops in the
1718                      block calculations (much slower)
1719 .    -mat_block_size - size of the blocks to use
1720 
1721    Level: intermediate
1722 
1723    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1724    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
1725    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
1726    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1727 
1728    Notes:
1729    The number of rows and columns must be divisible by blocksize.
1730    This matrix type does not support complex Hermitian operation.
1731 
1732    Specify the preallocated storage with either nz or nnz (not both).
1733    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1734    allocation.  For additional details, see the users manual chapter on
1735    matrices.
1736 
1737    If the nnz parameter is given then the nz parameter is ignored
1738 
1739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1740 @*/
1741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1742 {
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1747   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1748   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1749   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1755 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1756 {
1757   Mat            C;
1758   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1759   PetscErrorCode ierr;
1760   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1761 
1762   PetscFunctionBegin;
1763   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1764 
1765   *B = 0;
1766   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1767   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1768   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1769   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1770   c    = (Mat_SeqSBAIJ*)C->data;
1771 
1772   C->preallocated   = PETSC_TRUE;
1773   C->factor         = A->factor;
1774   c->row            = 0;
1775   c->icol           = 0;
1776   c->saved_values   = 0;
1777   c->keepzeroedrows = a->keepzeroedrows;
1778   C->assembled      = PETSC_TRUE;
1779 
1780   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1781   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1782   c->bs2  = a->bs2;
1783   c->mbs  = a->mbs;
1784   c->nbs  = a->nbs;
1785 
1786   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1787   for (i=0; i<mbs; i++) {
1788     c->imax[i] = a->imax[i];
1789     c->ilen[i] = a->ilen[i];
1790   }
1791 
1792   /* allocate the matrix space */
1793   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1794   c->singlemalloc = PETSC_TRUE;
1795   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1796   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1797   if (mbs > 0) {
1798     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1799     if (cpvalues == MAT_COPY_VALUES) {
1800       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1801     } else {
1802       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1803     }
1804   }
1805 
1806   c->roworiented = a->roworiented;
1807   c->nonew       = a->nonew;
1808 
1809   if (a->diag) {
1810     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1811     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1812     for (i=0; i<mbs; i++) {
1813       c->diag[i] = a->diag[i];
1814     }
1815   } else c->diag  = 0;
1816   c->nz           = a->nz;
1817   c->maxnz        = a->maxnz;
1818   c->solve_work   = 0;
1819   c->mult_work    = 0;
1820   c->free_a       = PETSC_TRUE;
1821   c->free_ij      = PETSC_TRUE;
1822   *B = C;
1823   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1829 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1830 {
1831   Mat_SeqSBAIJ   *a;
1832   Mat            B;
1833   PetscErrorCode ierr;
1834   int            fd;
1835   PetscMPIInt    size;
1836   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1837   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1838   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1839   PetscInt       *masked,nmask,tmp,bs2,ishift;
1840   PetscScalar    *aa;
1841   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1842 
1843   PetscFunctionBegin;
1844   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1845   bs2  = bs*bs;
1846 
1847   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1848   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1849   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1850   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1851   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1852   M = header[1]; N = header[2]; nz = header[3];
1853 
1854   if (header[3] < 0) {
1855     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1856   }
1857 
1858   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1859 
1860   /*
1861      This code adds extra rows to make sure the number of rows is
1862     divisible by the blocksize
1863   */
1864   mbs        = M/bs;
1865   extra_rows = bs - M + bs*(mbs);
1866   if (extra_rows == bs) extra_rows = 0;
1867   else                  mbs++;
1868   if (extra_rows) {
1869     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1870   }
1871 
1872   /* read in row lengths */
1873   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1874   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1875   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1876 
1877   /* read in column indices */
1878   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1879   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1880   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1881 
1882   /* loop over row lengths determining block row lengths */
1883   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1884   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1885   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1886   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1887   masked   = mask + mbs;
1888   rowcount = 0; nzcount = 0;
1889   for (i=0; i<mbs; i++) {
1890     nmask = 0;
1891     for (j=0; j<bs; j++) {
1892       kmax = rowlengths[rowcount];
1893       for (k=0; k<kmax; k++) {
1894         tmp = jj[nzcount++]/bs;   /* block col. index */
1895         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1896       }
1897       rowcount++;
1898     }
1899     s_browlengths[i] += nmask;
1900 
1901     /* zero out the mask elements we set */
1902     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1903   }
1904 
1905   /* create our matrix */
1906   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1907   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1908   ierr = MatSetType(B,type);CHKERRQ(ierr);
1909   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1910   a = (Mat_SeqSBAIJ*)B->data;
1911 
1912   /* set matrix "i" values */
1913   a->i[0] = 0;
1914   for (i=1; i<= mbs; i++) {
1915     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1916     a->ilen[i-1] = s_browlengths[i-1];
1917   }
1918   a->nz = a->i[mbs];
1919 
1920   /* read in nonzero values */
1921   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1922   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1923   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1924 
1925   /* set "a" and "j" values into matrix */
1926   nzcount = 0; jcount = 0;
1927   for (i=0; i<mbs; i++) {
1928     nzcountb = nzcount;
1929     nmask    = 0;
1930     for (j=0; j<bs; j++) {
1931       kmax = rowlengths[i*bs+j];
1932       for (k=0; k<kmax; k++) {
1933         tmp = jj[nzcount++]/bs; /* block col. index */
1934         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1935       }
1936     }
1937     /* sort the masked values */
1938     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1939 
1940     /* set "j" values into matrix */
1941     maskcount = 1;
1942     for (j=0; j<nmask; j++) {
1943       a->j[jcount++]  = masked[j];
1944       mask[masked[j]] = maskcount++;
1945     }
1946 
1947     /* set "a" values into matrix */
1948     ishift = bs2*a->i[i];
1949     for (j=0; j<bs; j++) {
1950       kmax = rowlengths[i*bs+j];
1951       for (k=0; k<kmax; k++) {
1952         tmp       = jj[nzcountb]/bs ; /* block col. index */
1953         if (tmp >= i){
1954           block     = mask[tmp] - 1;
1955           point     = jj[nzcountb] - bs*tmp;
1956           idx       = ishift + bs2*block + j + bs*point;
1957           a->a[idx] = aa[nzcountb];
1958         }
1959         nzcountb++;
1960       }
1961     }
1962     /* zero out the mask elements we set */
1963     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1964   }
1965   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1966 
1967   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1968   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1969   ierr = PetscFree(aa);CHKERRQ(ierr);
1970   ierr = PetscFree(jj);CHKERRQ(ierr);
1971   ierr = PetscFree(mask);CHKERRQ(ierr);
1972 
1973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1975   ierr = MatView_Private(B);CHKERRQ(ierr);
1976   *A = B;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1982 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1983 {
1984   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1985   MatScalar      *aa=a->a,*v,*v1;
1986   PetscScalar    *x,*b,*t,sum,d;
1987   PetscErrorCode ierr;
1988   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1989   PetscInt       nz,nz1,*vj,*vj1,i;
1990 
1991   PetscFunctionBegin;
1992   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1993 
1994   its = its*lits;
1995   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1996 
1997   if (bs > 1)
1998     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1999 
2000   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2001   if (xx != bb) {
2002     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2003   } else {
2004     b = x;
2005   }
2006 
2007   if (!a->relax_work) {
2008     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
2009   }
2010   t = a->relax_work;
2011 
2012   if (flag & SOR_ZERO_INITIAL_GUESS) {
2013     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2014       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2015 
2016       for (i=0; i<m; i++){
2017         d  = *(aa + ai[i]);  /* diag[i] */
2018         v  = aa + ai[i] + 1;
2019         vj = aj + ai[i] + 1;
2020         nz = ai[i+1] - ai[i] - 1;
2021         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2022         x[i] = omega*t[i]/d;
2023         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2024       }
2025     }
2026 
2027     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2028       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2029         t = b;
2030       }
2031 
2032       for (i=m-1; i>=0; i--){
2033         d  = *(aa + ai[i]);
2034         v  = aa + ai[i] + 1;
2035         vj = aj + ai[i] + 1;
2036         nz = ai[i+1] - ai[i] - 1;
2037         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2038         sum = t[i];
2039         while (nz--) sum -= x[*vj++]*(*v++);
2040         x[i] =   (1-omega)*x[i] + omega*sum/d;
2041       }
2042       t = a->relax_work;
2043     }
2044     its--;
2045   }
2046 
2047   while (its--) {
2048     /*
2049        forward sweep:
2050        for i=0,...,m-1:
2051          sum[i] = (b[i] - U(i,:)x )/d[i];
2052          x[i]   = (1-omega)x[i] + omega*sum[i];
2053          b      = b - x[i]*U^T(i,:);
2054 
2055     */
2056     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2057       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2058 
2059       for (i=0; i<m; i++){
2060         d  = *(aa + ai[i]);  /* diag[i] */
2061         v  = aa + ai[i] + 1; v1=v;
2062         vj = aj + ai[i] + 1; vj1=vj;
2063         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2064         sum = t[i];
2065         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2066         while (nz1--) sum -= (*v1++)*x[*vj1++];
2067         x[i] = (1-omega)*x[i] + omega*sum/d;
2068         while (nz--) t[*vj++] -= x[i]*(*v++);
2069       }
2070     }
2071 
2072     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2073       /*
2074        backward sweep:
2075        b = b - x[i]*U^T(i,:), i=0,...,n-2
2076        for i=m-1,...,0:
2077          sum[i] = (b[i] - U(i,:)x )/d[i];
2078          x[i]   = (1-omega)x[i] + omega*sum[i];
2079       */
2080       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2081       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2082 
2083       for (i=0; i<m-1; i++){  /* update rhs */
2084         v  = aa + ai[i] + 1;
2085         vj = aj + ai[i] + 1;
2086         nz = ai[i+1] - ai[i] - 1;
2087         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2088         while (nz--) t[*vj++] -= x[i]*(*v++);
2089       }
2090       for (i=m-1; i>=0; i--){
2091         d  = *(aa + ai[i]);
2092         v  = aa + ai[i] + 1;
2093         vj = aj + ai[i] + 1;
2094         nz = ai[i+1] - ai[i] - 1;
2095         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2096         sum = t[i];
2097         while (nz--) sum -= x[*vj++]*(*v++);
2098         x[i] =   (1-omega)*x[i] + omega*sum/d;
2099       }
2100     }
2101   }
2102 
2103   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2104   if (bb != xx) {
2105     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2112 /*@
2113      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2114               (upper triangular entries in CSR format) provided by the user.
2115 
2116      Collective on MPI_Comm
2117 
2118    Input Parameters:
2119 +  comm - must be an MPI communicator of size 1
2120 .  bs - size of block
2121 .  m - number of rows
2122 .  n - number of columns
2123 .  i - row indices
2124 .  j - column indices
2125 -  a - matrix values
2126 
2127    Output Parameter:
2128 .  mat - the matrix
2129 
2130    Level: intermediate
2131 
2132    Notes:
2133        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2134     once the matrix is destroyed
2135 
2136        You cannot set new nonzero locations into this matrix, that will generate an error.
2137 
2138        The i and j indices are 0 based
2139 
2140 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2141 
2142 @*/
2143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2144 {
2145   PetscErrorCode ierr;
2146   PetscInt       ii;
2147   Mat_SeqSBAIJ   *sbaij;
2148 
2149   PetscFunctionBegin;
2150   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2151   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2152 
2153   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2154   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2155   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2156   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2157   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2158   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2159 
2160   sbaij->i = i;
2161   sbaij->j = j;
2162   sbaij->a = a;
2163   sbaij->singlemalloc = PETSC_FALSE;
2164   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2165   sbaij->free_a       = PETSC_FALSE;
2166   sbaij->free_ij      = PETSC_FALSE;
2167 
2168   for (ii=0; ii<m; ii++) {
2169     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2170 #if defined(PETSC_USE_DEBUG)
2171     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2172 #endif
2173   }
2174 #if defined(PETSC_USE_DEBUG)
2175   for (ii=0; ii<sbaij->i[m]; ii++) {
2176     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2177     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2178   }
2179 #endif
2180 
2181   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 
2187 
2188 
2189 
2190