xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 8fb5bd83c3955fefcf33a54e3bb66920a9fa884b)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     PetscCall(PetscMalloc1(mbs,&a->diags));
42     PetscCall(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       PetscCall(ISDestroy(&row));
48       PetscCall(ISDestroy(&col));
49     }
50     PetscCall(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   PetscCall(VecSet(xx,0.0));
55   PetscCall(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   PetscCall(VecCopy(bb,a->workb));
58   PetscCall(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         PetscCall(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
72           PetscCall(MatMultAdd(v[j],right,left,left));
73           PetscCall(VecResetArray(right));
74         }
75         PetscCall(VecPlaceArray(right,b + i*bs));
76         PetscCall(VecAYPX(left,-1.0,right));
77         PetscCall(VecResetArray(right));
78 
79         PetscCall(VecPlaceArray(right,x + i*bs));
80         PetscCall(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           PetscCall(MatMultTranspose(v[j],right,left));
85           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
86           PetscCall(VecAXPY(middle,-1.0,left));
87           PetscCall(VecResetArray(middle));
88         }
89         PetscCall(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         PetscCall(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
103           PetscCall(MatMultAdd(v[j],right,left,left));
104           PetscCall(VecResetArray(right));
105         }
106         PetscCall(VecPlaceArray(right,b + i*bs));
107         PetscCall(VecAYPX(left,-1.0,right));
108         PetscCall(VecResetArray(right));
109 
110         PetscCall(VecPlaceArray(right,x + i*bs));
111         PetscCall(MatSolve(diag[i],left,right));
112         PetscCall(VecResetArray(right));
113 
114       }
115     }
116   }
117   PetscCall(VecRestoreArray(xx,&x));
118   PetscCall(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     PetscCall(PetscMalloc1(mbs,&a->diags));
144     PetscCall(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       PetscCall(ISDestroy(&row));
150       PetscCall(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   PetscCall(VecSet(xx,0.0));
156   PetscCall(VecGetArray(xx,&x));
157   PetscCall(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
172             PetscCall(MatMultAdd(v[j],right,left,left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right,b + i*bs));
177         PetscCall(VecAYPX(left,-1.0,right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right,x + i*bs));
181         PetscCall(MatSolve(diag[i],left,right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         PetscCall(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
196             PetscCall(MatMultAdd(v[j],right,left,left));
197             PetscCall(VecResetArray(right));
198           }
199         }
200         PetscCall(VecPlaceArray(right,b + i*bs));
201         PetscCall(VecAYPX(left,-1.0,right));
202         PetscCall(VecResetArray(right));
203 
204         PetscCall(VecPlaceArray(right,x + i*bs));
205         PetscCall(MatSolve(diag[i],left,right));
206         PetscCall(VecResetArray(right));
207 
208       }
209     }
210   }
211   PetscCall(VecRestoreArray(xx,&x));
212   PetscCall(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   Mat               tmpA;
288   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
289   const PetscInt    *cols;
290   const PetscScalar *values;
291   PetscBool         flg = PETSC_FALSE,notdone;
292   Mat_SeqAIJ        *a;
293   Mat_BlockMat      *amat;
294 
295   PetscFunctionBegin;
296   /* force binary viewer to load .info file if it has not yet done so */
297   PetscCall(PetscViewerSetUp(viewer));
298   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
299   PetscCall(MatSetType(tmpA,MATSEQAIJ));
300   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
301 
302   PetscCall(MatGetLocalSize(tmpA,&m,&n));
303   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
304   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
305   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
306   PetscOptionsEnd();
307 
308   /* Determine number of nonzero blocks for each block row */
309   a    = (Mat_SeqAIJ*) tmpA->data;
310   mbs  = m/bs;
311   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
312   PetscCall(PetscArrayzero(lens,mbs));
313 
314   for (i=0; i<mbs; i++) {
315     for (j=0; j<bs; j++) {
316       ii[j]    = a->j + a->i[i*bs + j];
317       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
318     }
319 
320     currentcol = -1;
321     while (PETSC_TRUE) {
322       notdone = PETSC_FALSE;
323       nextcol = 1000000000;
324       for (j=0; j<bs; j++) {
325         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
326           ii[j]++;
327           ilens[j]--;
328         }
329         if (ilens[j] > 0) {
330           notdone = PETSC_TRUE;
331           nextcol = PetscMin(nextcol,ii[j][0]/bs);
332         }
333       }
334       if (!notdone) break;
335       if (!flg || (nextcol >= i)) lens[i]++;
336       currentcol = nextcol;
337     }
338   }
339 
340   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
341     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
342   }
343   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
344   if (flg) PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
345   amat = (Mat_BlockMat*)(newmat)->data;
346 
347   /* preallocate the submatrices */
348   PetscCall(PetscMalloc1(bs,&llens));
349   for (i=0; i<mbs; i++) { /* loops for block rows */
350     for (j=0; j<bs; j++) {
351       ii[j]    = a->j + a->i[i*bs + j];
352       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
353     }
354 
355     currentcol = 1000000000;
356     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
357       if (ilens[j] > 0) {
358         currentcol = PetscMin(currentcol,ii[j][0]/bs);
359       }
360     }
361 
362     while (PETSC_TRUE) {  /* loops over blocks in block row */
363       notdone = PETSC_FALSE;
364       nextcol = 1000000000;
365       PetscCall(PetscArrayzero(llens,bs));
366       for (j=0; j<bs; j++) { /* loop over rows in block */
367         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
368           ii[j]++;
369           ilens[j]--;
370           llens[j]++;
371         }
372         if (ilens[j] > 0) {
373           notdone = PETSC_TRUE;
374           nextcol = PetscMin(nextcol,ii[j][0]/bs);
375         }
376       }
377       PetscCheck(cnt < amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
378       if (!flg || currentcol >= i) {
379         amat->j[cnt] = currentcol;
380         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
381       }
382 
383       if (!notdone) break;
384       currentcol = nextcol;
385     }
386     amat->ilen[i] = lens[i];
387   }
388 
389   PetscCall(PetscFree3(lens,ii,ilens));
390   PetscCall(PetscFree(llens));
391 
392   /* copy over the matrix, one row at a time */
393   for (i=0; i<m; i++) {
394     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
395     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
396     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
397   }
398   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
399   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
400   PetscFunctionReturn(0);
401 }
402 
403 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
404 {
405   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
406   const char        *name;
407   PetscViewerFormat format;
408 
409   PetscFunctionBegin;
410   PetscCall(PetscObjectGetName((PetscObject)A,&name));
411   PetscCall(PetscViewerGetFormat(viewer,&format));
412   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
413     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
414     if (A->symmetric) {
415       PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
422 {
423   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
424   PetscInt       i;
425 
426   PetscFunctionBegin;
427   PetscCall(VecDestroy(&bmat->right));
428   PetscCall(VecDestroy(&bmat->left));
429   PetscCall(VecDestroy(&bmat->middle));
430   PetscCall(VecDestroy(&bmat->workb));
431   if (bmat->diags) {
432     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
433       PetscCall(MatDestroy(&bmat->diags[i]));
434     }
435   }
436   if (bmat->a) {
437     for (i=0; i<bmat->nz; i++) {
438       PetscCall(MatDestroy(&bmat->a[i]));
439     }
440   }
441   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
442   PetscCall(PetscFree(mat->data));
443   PetscFunctionReturn(0);
444 }
445 
446 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
447 {
448   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
449   PetscScalar    *xx,*yy;
450   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
451   Mat            *aa;
452 
453   PetscFunctionBegin;
454   /*
455      Standard CSR multiply except each entry is a Mat
456   */
457   PetscCall(VecGetArray(x,&xx));
458 
459   PetscCall(VecSet(y,0.0));
460   PetscCall(VecGetArray(y,&yy));
461   aj   = bmat->j;
462   aa   = bmat->a;
463   ii   = bmat->i;
464   for (i=0; i<m; i++) {
465     jrow = ii[i];
466     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
467     n    = ii[i+1] - jrow;
468     for (j=0; j<n; j++) {
469       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
470       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
471       PetscCall(VecResetArray(bmat->right));
472       jrow++;
473     }
474     PetscCall(VecResetArray(bmat->left));
475   }
476   PetscCall(VecRestoreArray(x,&xx));
477   PetscCall(VecRestoreArray(y,&yy));
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
482 {
483   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
484   PetscScalar    *xx,*yy;
485   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
486   Mat            *aa;
487 
488   PetscFunctionBegin;
489   /*
490      Standard CSR multiply except each entry is a Mat
491   */
492   PetscCall(VecGetArray(x,&xx));
493 
494   PetscCall(VecSet(y,0.0));
495   PetscCall(VecGetArray(y,&yy));
496   aj   = bmat->j;
497   aa   = bmat->a;
498   ii   = bmat->i;
499   for (i=0; i<m; i++) {
500     jrow = ii[i];
501     n    = ii[i+1] - jrow;
502     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
503     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
504     /* if we ALWAYS required a diagonal entry then could remove this if test */
505     if (aj[jrow] == i) {
506       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
507       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
508       PetscCall(VecResetArray(bmat->right));
509       jrow++;
510       n--;
511     }
512     for (j=0; j<n; j++) {
513       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
514       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
515       PetscCall(VecResetArray(bmat->right));
516 
517       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
518       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
519       PetscCall(VecResetArray(bmat->right));
520       jrow++;
521     }
522     PetscCall(VecResetArray(bmat->left));
523     PetscCall(VecResetArray(bmat->middle));
524   }
525   PetscCall(VecRestoreArray(x,&xx));
526   PetscCall(VecRestoreArray(y,&yy));
527   PetscFunctionReturn(0);
528 }
529 
530 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
531 {
532   PetscFunctionBegin;
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
537 {
538   PetscFunctionBegin;
539   PetscFunctionReturn(0);
540 }
541 
542 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
543 {
544   PetscFunctionBegin;
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549      Adds diagonal pointers to sparse matrix structure.
550 */
551 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
552 {
553   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
554   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
555 
556   PetscFunctionBegin;
557   if (!a->diag) {
558     PetscCall(PetscMalloc1(mbs,&a->diag));
559   }
560   for (i=0; i<mbs; i++) {
561     a->diag[i] = a->i[i+1];
562     for (j=a->i[i]; j<a->i[i+1]; j++) {
563       if (a->j[j] == i) {
564         a->diag[i] = j;
565         break;
566       }
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
573 {
574   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
575   Mat_SeqAIJ     *c;
576   PetscInt       i,k,first,step,lensi,nrows,ncols;
577   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
578   PetscScalar    *a_new;
579   Mat            C,*aa = a->a;
580   PetscBool      stride,equal;
581 
582   PetscFunctionBegin;
583   PetscCall(ISEqual(isrow,iscol,&equal));
584   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
585   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
586   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
587   PetscCall(ISStrideGetInfo(iscol,&first,&step));
588   PetscCheck(step == A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
589 
590   PetscCall(ISGetLocalSize(isrow,&nrows));
591   ncols = nrows;
592 
593   /* create submatrix */
594   if (scall == MAT_REUSE_MATRIX) {
595     PetscInt n_cols,n_rows;
596     C    = *B;
597     PetscCall(MatGetSize(C,&n_rows,&n_cols));
598     PetscCheck(n_rows == nrows && n_cols == ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
599     PetscCall(MatZeroEntries(C));
600   } else {
601     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
602     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
603     if (A->symmetric) {
604       PetscCall(MatSetType(C,MATSEQSBAIJ));
605     } else {
606       PetscCall(MatSetType(C,MATSEQAIJ));
607     }
608     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
609     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
610   }
611   c = (Mat_SeqAIJ*)C->data;
612 
613   /* loop over rows inserting into submatrix */
614   a_new = c->a;
615   j_new = c->j;
616   i_new = c->i;
617 
618   for (i=0; i<nrows; i++) {
619     lensi = ailen[i];
620     for (k=0; k<lensi; k++) {
621       *j_new++ = *aj++;
622       PetscCall(MatGetValue(*aa++,first,first,a_new++));
623     }
624     i_new[i+1] = i_new[i] + lensi;
625     c->ilen[i] = lensi;
626   }
627 
628   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
629   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
630   *B   = C;
631   PetscFunctionReturn(0);
632 }
633 
634 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
635 {
636   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
637   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
638   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
639   Mat            *aa    = a->a,*ap;
640 
641   PetscFunctionBegin;
642   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
643 
644   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
645   for (i=1; i<m; i++) {
646     /* move each row back by the amount of empty slots (fshift) before it*/
647     fshift += imax[i-1] - ailen[i-1];
648     rmax    = PetscMax(rmax,ailen[i]);
649     if (fshift) {
650       ip = aj + ai[i];
651       ap = aa + ai[i];
652       N  = ailen[i];
653       for (j=0; j<N; j++) {
654         ip[j-fshift] = ip[j];
655         ap[j-fshift] = ap[j];
656       }
657     }
658     ai[i] = ai[i-1] + ailen[i-1];
659   }
660   if (m) {
661     fshift += imax[m-1] - ailen[m-1];
662     ai[m]   = ai[m-1] + ailen[m-1];
663   }
664   /* reset ilen and imax for each row */
665   for (i=0; i<m; i++) {
666     ailen[i] = imax[i] = ai[i+1] - ai[i];
667   }
668   a->nz = ai[m];
669   for (i=0; i<a->nz; i++) {
670     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
671     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
672     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
673   }
674   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
675   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
676   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
677 
678   A->info.mallocs    += a->reallocs;
679   a->reallocs         = 0;
680   A->info.nz_unneeded = (double)fshift;
681   a->rmax             = rmax;
682   PetscCall(MatMarkDiagonal_BlockMat(A));
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
687 {
688   PetscFunctionBegin;
689   if (opt == MAT_SYMMETRIC && flg) {
690     A->ops->sor  = MatSOR_BlockMat_Symmetric;
691     A->ops->mult = MatMult_BlockMat_Symmetric;
692   } else {
693     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
699                                        NULL,
700                                        NULL,
701                                        MatMult_BlockMat,
702                                /*  4*/ MatMultAdd_BlockMat,
703                                        MatMultTranspose_BlockMat,
704                                        MatMultTransposeAdd_BlockMat,
705                                        NULL,
706                                        NULL,
707                                        NULL,
708                                /* 10*/ NULL,
709                                        NULL,
710                                        NULL,
711                                        MatSOR_BlockMat,
712                                        NULL,
713                                /* 15*/ NULL,
714                                        NULL,
715                                        NULL,
716                                        NULL,
717                                        NULL,
718                                /* 20*/ NULL,
719                                        MatAssemblyEnd_BlockMat,
720                                        MatSetOption_BlockMat,
721                                        NULL,
722                                /* 24*/ NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        NULL,
727                                /* 29*/ NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        NULL,
732                                /* 34*/ NULL,
733                                        NULL,
734                                        NULL,
735                                        NULL,
736                                        NULL,
737                                /* 39*/ NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        NULL,
742                                /* 44*/ NULL,
743                                        NULL,
744                                        MatShift_Basic,
745                                        NULL,
746                                        NULL,
747                                /* 49*/ NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        NULL,
752                                /* 54*/ NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        NULL,
757                                /* 59*/ MatCreateSubMatrix_BlockMat,
758                                        MatDestroy_BlockMat,
759                                        MatView_BlockMat,
760                                        NULL,
761                                        NULL,
762                                /* 64*/ NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        NULL,
767                                /* 69*/ NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        NULL,
772                                /* 74*/ NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        NULL,
777                                /* 79*/ NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        MatLoad_BlockMat,
782                                /* 84*/ NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        NULL,
787                                /* 89*/ NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        NULL,
792                                /* 94*/ NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        NULL,
797                                /* 99*/ NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        NULL,
802                                /*104*/ NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        NULL,
807                                /*109*/ NULL,
808                                        NULL,
809                                        NULL,
810                                        NULL,
811                                        NULL,
812                                /*114*/ NULL,
813                                        NULL,
814                                        NULL,
815                                        NULL,
816                                        NULL,
817                                /*119*/ NULL,
818                                        NULL,
819                                        NULL,
820                                        NULL,
821                                        NULL,
822                                /*124*/ NULL,
823                                        NULL,
824                                        NULL,
825                                        NULL,
826                                        NULL,
827                                /*129*/ NULL,
828                                        NULL,
829                                        NULL,
830                                        NULL,
831                                        NULL,
832                                /*134*/ NULL,
833                                        NULL,
834                                        NULL,
835                                        NULL,
836                                        NULL,
837                                /*139*/ NULL,
838                                        NULL,
839                                        NULL,
840                                        NULL,
841                                        NULL,
842                                /*144*/ NULL,
843                                        NULL,
844                                        NULL,
845                                        NULL
846 };
847 
848 /*@C
849    MatBlockMatSetPreallocation - For good matrix assembly performance
850    the user should preallocate the matrix storage by setting the parameter nz
851    (or the array nnz).  By setting these parameters accurately, performance
852    during matrix assembly can be increased by more than a factor of 50.
853 
854    Collective
855 
856    Input Parameters:
857 +  B - The matrix
858 .  bs - size of each block in matrix
859 .  nz - number of nonzeros per block row (same for all rows)
860 -  nnz - array containing the number of nonzeros in the various block rows
861          (possibly different for each row) or NULL
862 
863    Notes:
864      If nnz is given then nz is ignored
865 
866    Specify the preallocated storage with either nz or nnz (not both).
867    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
868    allocation.  For large problems you MUST preallocate memory or you
869    will get TERRIBLE performance, see the users' manual chapter on matrices.
870 
871    Level: intermediate
872 
873 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
874 
875 @*/
876 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
877 {
878   PetscFunctionBegin;
879   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
880   PetscFunctionReturn(0);
881 }
882 
883 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
884 {
885   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
886   PetscInt       i;
887 
888   PetscFunctionBegin;
889   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
890   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
891   PetscCall(PetscLayoutSetUp(A->rmap));
892   PetscCall(PetscLayoutSetUp(A->cmap));
893   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
894 
895   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
896   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
897   if (nnz) {
898     for (i=0; i<A->rmap->n/bs; i++) {
899       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
900       PetscCheck(nnz[i] <= A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
901     }
902   }
903   bmat->mbs = A->rmap->n/bs;
904 
905   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
906   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
907   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
908 
909   if (!bmat->imax) {
910     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
911     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
912   }
913   if (PetscLikely(nnz)) {
914     nz = 0;
915     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
916       bmat->imax[i] = nnz[i];
917       nz           += nnz[i];
918     }
919   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
920 
921   /* bmat->ilen will count nonzeros in each row so far. */
922   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
923 
924   /* allocate the matrix space */
925   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
926   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
927   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
928   bmat->i[0] = 0;
929   for (i=1; i<bmat->mbs+1; i++) {
930     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
931   }
932   bmat->singlemalloc = PETSC_TRUE;
933   bmat->free_a       = PETSC_TRUE;
934   bmat->free_ij      = PETSC_TRUE;
935 
936   bmat->nz            = 0;
937   bmat->maxnz         = nz;
938   A->info.nz_unneeded = (double)bmat->maxnz;
939   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
940   PetscFunctionReturn(0);
941 }
942 
943 /*MC
944    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
945                  consisting of (usually) sparse blocks.
946 
947   Level: advanced
948 
949 .seealso: `MatCreateBlockMat()`
950 
951 M*/
952 
953 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
954 {
955   Mat_BlockMat   *b;
956 
957   PetscFunctionBegin;
958   PetscCall(PetscNewLog(A,&b));
959   A->data = (void*)b;
960   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
961 
962   A->assembled    = PETSC_TRUE;
963   A->preallocated = PETSC_FALSE;
964   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
965 
966   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
967   PetscFunctionReturn(0);
968 }
969 
970 /*@C
971    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
972 
973   Collective
974 
975    Input Parameters:
976 +  comm - MPI communicator
977 .  m - number of rows
978 .  n  - number of columns
979 .  bs - size of each submatrix
980 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
981 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
982 
983    Output Parameter:
984 .  A - the matrix
985 
986    Level: intermediate
987 
988    Notes:
989     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
990    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
991 
992    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
993 
994 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
995 @*/
996 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
997 {
998   PetscFunctionBegin;
999   PetscCall(MatCreate(comm,A));
1000   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
1001   PetscCall(MatSetType(*A,MATBLOCKMAT));
1002   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1003   PetscFunctionReturn(0);
1004 }
1005