xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 4fb89dddf56594b92bdd2ca7e24874fafe134f45)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     PetscCall(PetscMalloc1(mbs,&a->diags));
42     PetscCall(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       PetscCall(ISDestroy(&row));
48       PetscCall(ISDestroy(&col));
49     }
50     PetscCall(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   PetscCall(VecSet(xx,0.0));
55   PetscCall(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   PetscCall(VecCopy(bb,a->workb));
58   PetscCall(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         PetscCall(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
72           PetscCall(MatMultAdd(v[j],right,left,left));
73           PetscCall(VecResetArray(right));
74         }
75         PetscCall(VecPlaceArray(right,b + i*bs));
76         PetscCall(VecAYPX(left,-1.0,right));
77         PetscCall(VecResetArray(right));
78 
79         PetscCall(VecPlaceArray(right,x + i*bs));
80         PetscCall(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           PetscCall(MatMultTranspose(v[j],right,left));
85           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
86           PetscCall(VecAXPY(middle,-1.0,left));
87           PetscCall(VecResetArray(middle));
88         }
89         PetscCall(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         PetscCall(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
103           PetscCall(MatMultAdd(v[j],right,left,left));
104           PetscCall(VecResetArray(right));
105         }
106         PetscCall(VecPlaceArray(right,b + i*bs));
107         PetscCall(VecAYPX(left,-1.0,right));
108         PetscCall(VecResetArray(right));
109 
110         PetscCall(VecPlaceArray(right,x + i*bs));
111         PetscCall(MatSolve(diag[i],left,right));
112         PetscCall(VecResetArray(right));
113 
114       }
115     }
116   }
117   PetscCall(VecRestoreArray(xx,&x));
118   PetscCall(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     PetscCall(PetscMalloc1(mbs,&a->diags));
144     PetscCall(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       PetscCall(ISDestroy(&row));
150       PetscCall(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   PetscCall(VecSet(xx,0.0));
156   PetscCall(VecGetArray(xx,&x));
157   PetscCall(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
172             PetscCall(MatMultAdd(v[j],right,left,left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right,b + i*bs));
177         PetscCall(VecAYPX(left,-1.0,right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right,x + i*bs));
181         PetscCall(MatSolve(diag[i],left,right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         PetscCall(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
196             PetscCall(MatMultAdd(v[j],right,left,left));
197             PetscCall(VecResetArray(right));
198           }
199         }
200         PetscCall(VecPlaceArray(right,b + i*bs));
201         PetscCall(VecAYPX(left,-1.0,right));
202         PetscCall(VecResetArray(right));
203 
204         PetscCall(VecPlaceArray(right,x + i*bs));
205         PetscCall(MatSolve(diag[i],left,right));
206         PetscCall(VecResetArray(right));
207 
208       }
209     }
210   }
211   PetscCall(VecRestoreArray(xx,&x));
212   PetscCall(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   Mat               tmpA;
288   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
289   const PetscInt    *cols;
290   const PetscScalar *values;
291   PetscBool         flg = PETSC_FALSE,notdone;
292   Mat_SeqAIJ        *a;
293   Mat_BlockMat      *amat;
294 
295   PetscFunctionBegin;
296   /* force binary viewer to load .info file if it has not yet done so */
297   PetscCall(PetscViewerSetUp(viewer));
298   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
299   PetscCall(MatSetType(tmpA,MATSEQAIJ));
300   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
301 
302   PetscCall(MatGetLocalSize(tmpA,&m,&n));
303   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
304   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
305   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
306   PetscOptionsEnd();
307 
308   /* Determine number of nonzero blocks for each block row */
309   a    = (Mat_SeqAIJ*) tmpA->data;
310   mbs  = m/bs;
311   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
312   PetscCall(PetscArrayzero(lens,mbs));
313 
314   for (i=0; i<mbs; i++) {
315     for (j=0; j<bs; j++) {
316       ii[j]    = a->j + a->i[i*bs + j];
317       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
318     }
319 
320     currentcol = -1;
321     while (PETSC_TRUE) {
322       notdone = PETSC_FALSE;
323       nextcol = 1000000000;
324       for (j=0; j<bs; j++) {
325         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
326           ii[j]++;
327           ilens[j]--;
328         }
329         if (ilens[j] > 0) {
330           notdone = PETSC_TRUE;
331           nextcol = PetscMin(nextcol,ii[j][0]/bs);
332         }
333       }
334       if (!notdone) break;
335       if (!flg || (nextcol >= i)) lens[i]++;
336       currentcol = nextcol;
337     }
338   }
339 
340   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
341     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
342   }
343   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
344   if (flg) PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
345   amat = (Mat_BlockMat*)(newmat)->data;
346 
347   /* preallocate the submatrices */
348   PetscCall(PetscMalloc1(bs,&llens));
349   for (i=0; i<mbs; i++) { /* loops for block rows */
350     for (j=0; j<bs; j++) {
351       ii[j]    = a->j + a->i[i*bs + j];
352       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
353     }
354 
355     currentcol = 1000000000;
356     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
357       if (ilens[j] > 0) {
358         currentcol = PetscMin(currentcol,ii[j][0]/bs);
359       }
360     }
361 
362     while (PETSC_TRUE) {  /* loops over blocks in block row */
363       notdone = PETSC_FALSE;
364       nextcol = 1000000000;
365       PetscCall(PetscArrayzero(llens,bs));
366       for (j=0; j<bs; j++) { /* loop over rows in block */
367         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
368           ii[j]++;
369           ilens[j]--;
370           llens[j]++;
371         }
372         if (ilens[j] > 0) {
373           notdone = PETSC_TRUE;
374           nextcol = PetscMin(nextcol,ii[j][0]/bs);
375         }
376       }
377       PetscCheck(cnt < amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
378       if (!flg || currentcol >= i) {
379         amat->j[cnt] = currentcol;
380         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
381       }
382 
383       if (!notdone) break;
384       currentcol = nextcol;
385     }
386     amat->ilen[i] = lens[i];
387   }
388 
389   PetscCall(PetscFree3(lens,ii,ilens));
390   PetscCall(PetscFree(llens));
391 
392   /* copy over the matrix, one row at a time */
393   for (i=0; i<m; i++) {
394     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
395     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
396     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
397   }
398   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
399   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
400   PetscFunctionReturn(0);
401 }
402 
403 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
404 {
405   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
406   const char        *name;
407   PetscViewerFormat format;
408 
409   PetscFunctionBegin;
410   PetscCall(PetscObjectGetName((PetscObject)A,&name));
411   PetscCall(PetscViewerGetFormat(viewer,&format));
412   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
413     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
414     if (A->symmetric) {
415       PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
422 {
423   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
424   PetscInt       i;
425 
426   PetscFunctionBegin;
427   PetscCall(VecDestroy(&bmat->right));
428   PetscCall(VecDestroy(&bmat->left));
429   PetscCall(VecDestroy(&bmat->middle));
430   PetscCall(VecDestroy(&bmat->workb));
431   if (bmat->diags) {
432     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
433       PetscCall(MatDestroy(&bmat->diags[i]));
434     }
435   }
436   if (bmat->a) {
437     for (i=0; i<bmat->nz; i++) {
438       PetscCall(MatDestroy(&bmat->a[i]));
439     }
440   }
441   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
442   PetscCall(PetscFree(mat->data));
443   PetscFunctionReturn(0);
444 }
445 
446 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
447 {
448   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
449   PetscScalar    *xx,*yy;
450   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
451   Mat            *aa;
452 
453   PetscFunctionBegin;
454   /*
455      Standard CSR multiply except each entry is a Mat
456   */
457   PetscCall(VecGetArray(x,&xx));
458 
459   PetscCall(VecSet(y,0.0));
460   PetscCall(VecGetArray(y,&yy));
461   aj   = bmat->j;
462   aa   = bmat->a;
463   ii   = bmat->i;
464   for (i=0; i<m; i++) {
465     jrow = ii[i];
466     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
467     n    = ii[i+1] - jrow;
468     for (j=0; j<n; j++) {
469       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
470       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
471       PetscCall(VecResetArray(bmat->right));
472       jrow++;
473     }
474     PetscCall(VecResetArray(bmat->left));
475   }
476   PetscCall(VecRestoreArray(x,&xx));
477   PetscCall(VecRestoreArray(y,&yy));
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
482 {
483   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
484   PetscScalar    *xx,*yy;
485   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
486   Mat            *aa;
487 
488   PetscFunctionBegin;
489   /*
490      Standard CSR multiply except each entry is a Mat
491   */
492   PetscCall(VecGetArray(x,&xx));
493 
494   PetscCall(VecSet(y,0.0));
495   PetscCall(VecGetArray(y,&yy));
496   aj   = bmat->j;
497   aa   = bmat->a;
498   ii   = bmat->i;
499   for (i=0; i<m; i++) {
500     jrow = ii[i];
501     n    = ii[i+1] - jrow;
502     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
503     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
504     /* if we ALWAYS required a diagonal entry then could remove this if test */
505     if (aj[jrow] == i) {
506       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
507       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
508       PetscCall(VecResetArray(bmat->right));
509       jrow++;
510       n--;
511     }
512     for (j=0; j<n; j++) {
513       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
514       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
515       PetscCall(VecResetArray(bmat->right));
516 
517       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
518       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
519       PetscCall(VecResetArray(bmat->right));
520       jrow++;
521     }
522     PetscCall(VecResetArray(bmat->left));
523     PetscCall(VecResetArray(bmat->middle));
524   }
525   PetscCall(VecRestoreArray(x,&xx));
526   PetscCall(VecRestoreArray(y,&yy));
527   PetscFunctionReturn(0);
528 }
529 
530 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
531 {
532   PetscFunctionBegin;
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
537 {
538   PetscFunctionBegin;
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
543 {
544   PetscFunctionBegin;
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549      Adds diagonal pointers to sparse matrix structure.
550 */
551 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
552 {
553   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
554   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
555 
556   PetscFunctionBegin;
557   if (!a->diag) {
558     PetscCall(PetscMalloc1(mbs,&a->diag));
559   }
560   for (i=0; i<mbs; i++) {
561     a->diag[i] = a->i[i+1];
562     for (j=a->i[i]; j<a->i[i+1]; j++) {
563       if (a->j[j] == i) {
564         a->diag[i] = j;
565         break;
566       }
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
573 {
574   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
575   Mat_SeqAIJ     *c;
576   PetscInt       i,k,first,step,lensi,nrows,ncols;
577   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
578   PetscScalar    *a_new;
579   Mat            C,*aa = a->a;
580   PetscBool      stride,equal;
581 
582   PetscFunctionBegin;
583   PetscCall(ISEqual(isrow,iscol,&equal));
584   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
585   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
586   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
587   PetscCall(ISStrideGetInfo(iscol,&first,&step));
588   PetscCheck(step == A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
589 
590   PetscCall(ISGetLocalSize(isrow,&nrows));
591   ncols = nrows;
592 
593   /* create submatrix */
594   if (scall == MAT_REUSE_MATRIX) {
595     PetscInt n_cols,n_rows;
596     C    = *B;
597     PetscCall(MatGetSize(C,&n_rows,&n_cols));
598     PetscCheck(n_rows == nrows && n_cols == ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
599     PetscCall(MatZeroEntries(C));
600   } else {
601     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
602     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
603     if (A->symmetric) {
604       PetscCall(MatSetType(C,MATSEQSBAIJ));
605     } else {
606       PetscCall(MatSetType(C,MATSEQAIJ));
607     }
608     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
609     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
610   }
611   c = (Mat_SeqAIJ*)C->data;
612 
613   /* loop over rows inserting into submatrix */
614   a_new = c->a;
615   j_new = c->j;
616   i_new = c->i;
617 
618   for (i=0; i<nrows; i++) {
619     lensi = ailen[i];
620     for (k=0; k<lensi; k++) {
621       *j_new++ = *aj++;
622       PetscCall(MatGetValue(*aa++,first,first,a_new++));
623     }
624     i_new[i+1] = i_new[i] + lensi;
625     c->ilen[i] = lensi;
626   }
627 
628   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
629   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
630   *B   = C;
631   PetscFunctionReturn(0);
632 }
633 
634 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
635 {
636   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
637   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
638   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
639   Mat            *aa    = a->a,*ap;
640 
641   PetscFunctionBegin;
642   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
643 
644   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
645   for (i=1; i<m; i++) {
646     /* move each row back by the amount of empty slots (fshift) before it*/
647     fshift += imax[i-1] - ailen[i-1];
648     rmax    = PetscMax(rmax,ailen[i]);
649     if (fshift) {
650       ip = aj + ai[i];
651       ap = aa + ai[i];
652       N  = ailen[i];
653       for (j=0; j<N; j++) {
654         ip[j-fshift] = ip[j];
655         ap[j-fshift] = ap[j];
656       }
657     }
658     ai[i] = ai[i-1] + ailen[i-1];
659   }
660   if (m) {
661     fshift += imax[m-1] - ailen[m-1];
662     ai[m]   = ai[m-1] + ailen[m-1];
663   }
664   /* reset ilen and imax for each row */
665   for (i=0; i<m; i++) {
666     ailen[i] = imax[i] = ai[i+1] - ai[i];
667   }
668   a->nz = ai[m];
669   for (i=0; i<a->nz; i++) {
670     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
671     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
672     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
673   }
674   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
675   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
676   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
677 
678   A->info.mallocs    += a->reallocs;
679   a->reallocs         = 0;
680   A->info.nz_unneeded = (double)fshift;
681   a->rmax             = rmax;
682   PetscCall(MatMarkDiagonal_BlockMat(A));
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
687 {
688   PetscFunctionBegin;
689   if (opt == MAT_SYMMETRIC && flg) {
690     A->ops->sor  = MatSOR_BlockMat_Symmetric;
691     A->ops->mult = MatMult_BlockMat_Symmetric;
692   } else {
693     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
699                                        NULL,
700                                        NULL,
701                                        MatMult_BlockMat,
702                                /*  4*/ MatMultAdd_BlockMat,
703                                        MatMultTranspose_BlockMat,
704                                        MatMultTransposeAdd_BlockMat,
705                                        NULL,
706                                        NULL,
707                                        NULL,
708                                /* 10*/ NULL,
709                                        NULL,
710                                        NULL,
711                                        MatSOR_BlockMat,
712                                        NULL,
713                                /* 15*/ NULL,
714                                        NULL,
715                                        NULL,
716                                        NULL,
717                                        NULL,
718                                /* 20*/ NULL,
719                                        MatAssemblyEnd_BlockMat,
720                                        MatSetOption_BlockMat,
721                                        NULL,
722                                /* 24*/ NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        NULL,
727                                /* 29*/ NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        NULL,
732                                /* 34*/ NULL,
733                                        NULL,
734                                        NULL,
735                                        NULL,
736                                        NULL,
737                                /* 39*/ NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        NULL,
742                                /* 44*/ NULL,
743                                        NULL,
744                                        MatShift_Basic,
745                                        NULL,
746                                        NULL,
747                                /* 49*/ NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        NULL,
752                                /* 54*/ NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        NULL,
757                                /* 59*/ MatCreateSubMatrix_BlockMat,
758                                        MatDestroy_BlockMat,
759                                        MatView_BlockMat,
760                                        NULL,
761                                        NULL,
762                                /* 64*/ NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        NULL,
767                                /* 69*/ NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        NULL,
772                                /* 74*/ NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        NULL,
777                                /* 79*/ NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        MatLoad_BlockMat,
782                                /* 84*/ NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        NULL,
787                                /* 89*/ NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        NULL,
792                                /* 94*/ NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        NULL,
797                                /* 99*/ NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        NULL,
802                                /*104*/ NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        NULL,
807                                /*109*/ NULL,
808                                        NULL,
809                                        NULL,
810                                        NULL,
811                                        NULL,
812                                /*114*/ NULL,
813                                        NULL,
814                                        NULL,
815                                        NULL,
816                                        NULL,
817                                /*119*/ NULL,
818                                        NULL,
819                                        NULL,
820                                        NULL,
821                                        NULL,
822                                /*124*/ NULL,
823                                        NULL,
824                                        NULL,
825                                        NULL,
826                                        NULL,
827                                /*129*/ NULL,
828                                        NULL,
829                                        NULL,
830                                        NULL,
831                                        NULL,
832                                /*134*/ NULL,
833                                        NULL,
834                                        NULL,
835                                        NULL,
836                                        NULL,
837                                /*139*/ NULL,
838                                        NULL,
839                                        NULL,
840                                        NULL,
841                                        NULL,
842                                /*144*/ NULL,
843                                        NULL,
844                                        NULL,
845                                        NULL,
846                                        NULL,
847                                        NULL
848 };
849 
850 /*@C
851    MatBlockMatSetPreallocation - For good matrix assembly performance
852    the user should preallocate the matrix storage by setting the parameter nz
853    (or the array nnz).  By setting these parameters accurately, performance
854    during matrix assembly can be increased by more than a factor of 50.
855 
856    Collective
857 
858    Input Parameters:
859 +  B - The matrix
860 .  bs - size of each block in matrix
861 .  nz - number of nonzeros per block row (same for all rows)
862 -  nnz - array containing the number of nonzeros in the various block rows
863          (possibly different for each row) or NULL
864 
865    Notes:
866      If nnz is given then nz is ignored
867 
868    Specify the preallocated storage with either nz or nnz (not both).
869    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
870    allocation.  For large problems you MUST preallocate memory or you
871    will get TERRIBLE performance, see the users' manual chapter on matrices.
872 
873    Level: intermediate
874 
875 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
876 
877 @*/
878 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
879 {
880   PetscFunctionBegin;
881   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
882   PetscFunctionReturn(0);
883 }
884 
885 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
886 {
887   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
888   PetscInt       i;
889 
890   PetscFunctionBegin;
891   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
892   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
893   PetscCall(PetscLayoutSetUp(A->rmap));
894   PetscCall(PetscLayoutSetUp(A->cmap));
895   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
896 
897   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
898   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
899   if (nnz) {
900     for (i=0; i<A->rmap->n/bs; i++) {
901       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
902       PetscCheck(nnz[i] <= A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
903     }
904   }
905   bmat->mbs = A->rmap->n/bs;
906 
907   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
908   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
909   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
910 
911   if (!bmat->imax) {
912     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
913     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
914   }
915   if (PetscLikely(nnz)) {
916     nz = 0;
917     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
918       bmat->imax[i] = nnz[i];
919       nz           += nnz[i];
920     }
921   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
922 
923   /* bmat->ilen will count nonzeros in each row so far. */
924   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
925 
926   /* allocate the matrix space */
927   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
928   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
929   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
930   bmat->i[0] = 0;
931   for (i=1; i<bmat->mbs+1; i++) {
932     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
933   }
934   bmat->singlemalloc = PETSC_TRUE;
935   bmat->free_a       = PETSC_TRUE;
936   bmat->free_ij      = PETSC_TRUE;
937 
938   bmat->nz            = 0;
939   bmat->maxnz         = nz;
940   A->info.nz_unneeded = (double)bmat->maxnz;
941   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
942   PetscFunctionReturn(0);
943 }
944 
945 /*MC
946    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
947                  consisting of (usually) sparse blocks.
948 
949   Level: advanced
950 
951 .seealso: `MatCreateBlockMat()`
952 
953 M*/
954 
955 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
956 {
957   Mat_BlockMat   *b;
958 
959   PetscFunctionBegin;
960   PetscCall(PetscNewLog(A,&b));
961   A->data = (void*)b;
962   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
963 
964   A->assembled    = PETSC_TRUE;
965   A->preallocated = PETSC_FALSE;
966   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
967 
968   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
969   PetscFunctionReturn(0);
970 }
971 
972 /*@C
973    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
974 
975   Collective
976 
977    Input Parameters:
978 +  comm - MPI communicator
979 .  m - number of rows
980 .  n  - number of columns
981 .  bs - size of each submatrix
982 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
983 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
984 
985    Output Parameter:
986 .  A - the matrix
987 
988    Level: intermediate
989 
990    Notes:
991     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
992    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
993 
994    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
995 
996 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
997 @*/
998 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(MatCreate(comm,A));
1002   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
1003   PetscCall(MatSetType(*A,MATBLOCKMAT));
1004   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1005   PetscFunctionReturn(0);
1006 }
1007