xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)),PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
37 
38   if (!a->diags) {
39     PetscCall(PetscMalloc1(mbs,&a->diags));
40     PetscCall(MatFactorInfoInitialize(&info));
41     for (i=0; i<mbs; i++) {
42       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
43       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
44       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
45       PetscCall(ISDestroy(&row));
46       PetscCall(ISDestroy(&col));
47     }
48     PetscCall(VecDuplicate(bb,&a->workb));
49   }
50   diag = a->diags;
51 
52   PetscCall(VecSet(xx,0.0));
53   PetscCall(VecGetArray(xx,&x));
54   /* copy right hand side because it must be modified during iteration */
55   PetscCall(VecCopy(bb,a->workb));
56   PetscCall(VecGetArrayRead(a->workb,&b));
57 
58   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
59   while (its--) {
60     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
61 
62       for (i=0; i<mbs; i++) {
63         n   = a->i[i+1] - a->i[i] - 1;
64         idx = a->j + a->i[i] + 1;
65         v   = a->a + a->i[i] + 1;
66 
67         PetscCall(VecSet(left,0.0));
68         for (j=0; j<n; j++) {
69           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
70           PetscCall(MatMultAdd(v[j],right,left,left));
71           PetscCall(VecResetArray(right));
72         }
73         PetscCall(VecPlaceArray(right,b + i*bs));
74         PetscCall(VecAYPX(left,-1.0,right));
75         PetscCall(VecResetArray(right));
76 
77         PetscCall(VecPlaceArray(right,x + i*bs));
78         PetscCall(MatSolve(diag[i],left,right));
79 
80         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
81         for (j=0; j<n; j++) {
82           PetscCall(MatMultTranspose(v[j],right,left));
83           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
84           PetscCall(VecAXPY(middle,-1.0,left));
85           PetscCall(VecResetArray(middle));
86         }
87         PetscCall(VecResetArray(right));
88 
89       }
90     }
91     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
92 
93       for (i=mbs-1; i>=0; i--) {
94         n   = a->i[i+1] - a->i[i] - 1;
95         idx = a->j + a->i[i] + 1;
96         v   = a->a + a->i[i] + 1;
97 
98         PetscCall(VecSet(left,0.0));
99         for (j=0; j<n; j++) {
100           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
101           PetscCall(MatMultAdd(v[j],right,left,left));
102           PetscCall(VecResetArray(right));
103         }
104         PetscCall(VecPlaceArray(right,b + i*bs));
105         PetscCall(VecAYPX(left,-1.0,right));
106         PetscCall(VecResetArray(right));
107 
108         PetscCall(VecPlaceArray(right,x + i*bs));
109         PetscCall(MatSolve(diag[i],left,right));
110         PetscCall(VecResetArray(right));
111 
112       }
113     }
114   }
115   PetscCall(VecRestoreArray(xx,&x));
116   PetscCall(VecRestoreArrayRead(a->workb,&b));
117   PetscFunctionReturn(0);
118 }
119 
120 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
121 {
122   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
123   PetscScalar       *x;
124   const Mat         *v;
125   const PetscScalar *b;
126   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
127   const PetscInt    *idx;
128   IS                row,col;
129   MatFactorInfo     info;
130   Vec               left = a->left,right = a->right;
131   Mat               *diag;
132 
133   PetscFunctionBegin;
134   its = its*lits;
135   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
136   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
137   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
138   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
139 
140   if (!a->diags) {
141     PetscCall(PetscMalloc1(mbs,&a->diags));
142     PetscCall(MatFactorInfoInitialize(&info));
143     for (i=0; i<mbs; i++) {
144       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
145       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
146       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
147       PetscCall(ISDestroy(&row));
148       PetscCall(ISDestroy(&col));
149     }
150   }
151   diag = a->diags;
152 
153   PetscCall(VecSet(xx,0.0));
154   PetscCall(VecGetArray(xx,&x));
155   PetscCall(VecGetArrayRead(bb,&b));
156 
157   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
158   while (its--) {
159     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
160 
161       for (i=0; i<mbs; i++) {
162         n   = a->i[i+1] - a->i[i];
163         idx = a->j + a->i[i];
164         v   = a->a + a->i[i];
165 
166         PetscCall(VecSet(left,0.0));
167         for (j=0; j<n; j++) {
168           if (idx[j] != i) {
169             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
170             PetscCall(MatMultAdd(v[j],right,left,left));
171             PetscCall(VecResetArray(right));
172           }
173         }
174         PetscCall(VecPlaceArray(right,b + i*bs));
175         PetscCall(VecAYPX(left,-1.0,right));
176         PetscCall(VecResetArray(right));
177 
178         PetscCall(VecPlaceArray(right,x + i*bs));
179         PetscCall(MatSolve(diag[i],left,right));
180         PetscCall(VecResetArray(right));
181       }
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
184 
185       for (i=mbs-1; i>=0; i--) {
186         n   = a->i[i+1] - a->i[i];
187         idx = a->j + a->i[i];
188         v   = a->a + a->i[i];
189 
190         PetscCall(VecSet(left,0.0));
191         for (j=0; j<n; j++) {
192           if (idx[j] != i) {
193             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
194             PetscCall(MatMultAdd(v[j],right,left,left));
195             PetscCall(VecResetArray(right));
196           }
197         }
198         PetscCall(VecPlaceArray(right,b + i*bs));
199         PetscCall(VecAYPX(left,-1.0,right));
200         PetscCall(VecResetArray(right));
201 
202         PetscCall(VecPlaceArray(right,x + i*bs));
203         PetscCall(MatSolve(diag[i],left,right));
204         PetscCall(VecResetArray(right));
205 
206       }
207     }
208   }
209   PetscCall(VecRestoreArray(xx,&x));
210   PetscCall(VecRestoreArrayRead(bb,&b));
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
215 {
216   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
217   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
218   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
219   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
220   PetscInt       ridx,cidx;
221   PetscBool      roworiented=a->roworiented;
222   MatScalar      value;
223   Mat            *ap,*aa = a->a;
224 
225   PetscFunctionBegin;
226   for (k=0; k<m; k++) { /* loop over added rows */
227     row  = im[k];
228     brow = row/bs;
229     if (row < 0) continue;
230     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
231     rp   = aj + ai[brow];
232     ap   = aa + ai[brow];
233     rmax = imax[brow];
234     nrow = ailen[brow];
235     low  = 0;
236     high = nrow;
237     for (l=0; l<n; l++) { /* loop over added columns */
238       if (in[l] < 0) continue;
239       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
240       col = in[l]; bcol = col/bs;
241       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
242       ridx = row % bs; cidx = col % bs;
243       if (roworiented) value = v[l + k*n];
244       else value = v[k + l*m];
245 
246       if (col <= lastcol) low = 0;
247       else high = nrow;
248       lastcol = col;
249       while (high-low > 7) {
250         t = (low+high)/2;
251         if (rp[t] > bcol) high = t;
252         else              low  = t;
253       }
254       for (i=low; i<high; i++) {
255         if (rp[i] > bcol) break;
256         if (rp[i] == bcol) goto noinsert1;
257       }
258       if (nonew == 1) goto noinsert1;
259       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
260       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
261       N = nrow++ - 1; high++;
262       /* shift up all the later entries in this row */
263       for (ii=N; ii>=i; ii--) {
264         rp[ii+1] = rp[ii];
265         ap[ii+1] = ap[ii];
266       }
267       if (N>=i) ap[i] = NULL;
268       rp[i] = bcol;
269       a->nz++;
270       A->nonzerostate++;
271 noinsert1:;
272       if (!*(ap+i)) {
273         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
274       }
275       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
276       low  = i;
277     }
278     ailen[brow] = nrow;
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
284 {
285   Mat               tmpA;
286   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
287   const PetscInt    *cols;
288   const PetscScalar *values;
289   PetscBool         flg = PETSC_FALSE,notdone;
290   Mat_SeqAIJ        *a;
291   Mat_BlockMat      *amat;
292 
293   PetscFunctionBegin;
294   /* force binary viewer to load .info file if it has not yet done so */
295   PetscCall(PetscViewerSetUp(viewer));
296   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
297   PetscCall(MatSetType(tmpA,MATSEQAIJ));
298   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
299 
300   PetscCall(MatGetLocalSize(tmpA,&m,&n));
301   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
302   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
303   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
304   PetscOptionsEnd();
305 
306   /* Determine number of nonzero blocks for each block row */
307   a    = (Mat_SeqAIJ*) tmpA->data;
308   mbs  = m/bs;
309   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
310   PetscCall(PetscArrayzero(lens,mbs));
311 
312   for (i=0; i<mbs; i++) {
313     for (j=0; j<bs; j++) {
314       ii[j]    = a->j + a->i[i*bs + j];
315       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
316     }
317 
318     currentcol = -1;
319     while (PETSC_TRUE) {
320       notdone = PETSC_FALSE;
321       nextcol = 1000000000;
322       for (j=0; j<bs; j++) {
323         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
324           ii[j]++;
325           ilens[j]--;
326         }
327         if (ilens[j] > 0) {
328           notdone = PETSC_TRUE;
329           nextcol = PetscMin(nextcol,ii[j][0]/bs);
330         }
331       }
332       if (!notdone) break;
333       if (!flg || (nextcol >= i)) lens[i]++;
334       currentcol = nextcol;
335     }
336   }
337 
338   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
339     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
340   }
341   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
342   if (flg) PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
343   amat = (Mat_BlockMat*)(newmat)->data;
344 
345   /* preallocate the submatrices */
346   PetscCall(PetscMalloc1(bs,&llens));
347   for (i=0; i<mbs; i++) { /* loops for block rows */
348     for (j=0; j<bs; j++) {
349       ii[j]    = a->j + a->i[i*bs + j];
350       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
351     }
352 
353     currentcol = 1000000000;
354     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
355       if (ilens[j] > 0) {
356         currentcol = PetscMin(currentcol,ii[j][0]/bs);
357       }
358     }
359 
360     while (PETSC_TRUE) {  /* loops over blocks in block row */
361       notdone = PETSC_FALSE;
362       nextcol = 1000000000;
363       PetscCall(PetscArrayzero(llens,bs));
364       for (j=0; j<bs; j++) { /* loop over rows in block */
365         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
366           ii[j]++;
367           ilens[j]--;
368           llens[j]++;
369         }
370         if (ilens[j] > 0) {
371           notdone = PETSC_TRUE;
372           nextcol = PetscMin(nextcol,ii[j][0]/bs);
373         }
374       }
375       PetscCheck(cnt < amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
376       if (!flg || currentcol >= i) {
377         amat->j[cnt] = currentcol;
378         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
379       }
380 
381       if (!notdone) break;
382       currentcol = nextcol;
383     }
384     amat->ilen[i] = lens[i];
385   }
386 
387   PetscCall(PetscFree3(lens,ii,ilens));
388   PetscCall(PetscFree(llens));
389 
390   /* copy over the matrix, one row at a time */
391   for (i=0; i<m; i++) {
392     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
393     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
394     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
395   }
396   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
397   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
398   PetscFunctionReturn(0);
399 }
400 
401 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
402 {
403   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
404   const char        *name;
405   PetscViewerFormat format;
406 
407   PetscFunctionBegin;
408   PetscCall(PetscObjectGetName((PetscObject)A,&name));
409   PetscCall(PetscViewerGetFormat(viewer,&format));
410   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
411     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
412     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
418 {
419   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
420   PetscInt       i;
421 
422   PetscFunctionBegin;
423   PetscCall(VecDestroy(&bmat->right));
424   PetscCall(VecDestroy(&bmat->left));
425   PetscCall(VecDestroy(&bmat->middle));
426   PetscCall(VecDestroy(&bmat->workb));
427   if (bmat->diags) {
428     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
429       PetscCall(MatDestroy(&bmat->diags[i]));
430     }
431   }
432   if (bmat->a) {
433     for (i=0; i<bmat->nz; i++) {
434       PetscCall(MatDestroy(&bmat->a[i]));
435     }
436   }
437   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
438   PetscCall(PetscFree(mat->data));
439   PetscFunctionReturn(0);
440 }
441 
442 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
443 {
444   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
445   PetscScalar    *xx,*yy;
446   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
447   Mat            *aa;
448 
449   PetscFunctionBegin;
450   /*
451      Standard CSR multiply except each entry is a Mat
452   */
453   PetscCall(VecGetArray(x,&xx));
454 
455   PetscCall(VecSet(y,0.0));
456   PetscCall(VecGetArray(y,&yy));
457   aj   = bmat->j;
458   aa   = bmat->a;
459   ii   = bmat->i;
460   for (i=0; i<m; i++) {
461     jrow = ii[i];
462     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
463     n    = ii[i+1] - jrow;
464     for (j=0; j<n; j++) {
465       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
466       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
467       PetscCall(VecResetArray(bmat->right));
468       jrow++;
469     }
470     PetscCall(VecResetArray(bmat->left));
471   }
472   PetscCall(VecRestoreArray(x,&xx));
473   PetscCall(VecRestoreArray(y,&yy));
474   PetscFunctionReturn(0);
475 }
476 
477 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
478 {
479   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
480   PetscScalar    *xx,*yy;
481   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
482   Mat            *aa;
483 
484   PetscFunctionBegin;
485   /*
486      Standard CSR multiply except each entry is a Mat
487   */
488   PetscCall(VecGetArray(x,&xx));
489 
490   PetscCall(VecSet(y,0.0));
491   PetscCall(VecGetArray(y,&yy));
492   aj   = bmat->j;
493   aa   = bmat->a;
494   ii   = bmat->i;
495   for (i=0; i<m; i++) {
496     jrow = ii[i];
497     n    = ii[i+1] - jrow;
498     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
499     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
500     /* if we ALWAYS required a diagonal entry then could remove this if test */
501     if (aj[jrow] == i) {
502       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
503       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
504       PetscCall(VecResetArray(bmat->right));
505       jrow++;
506       n--;
507     }
508     for (j=0; j<n; j++) {
509       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
510       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
511       PetscCall(VecResetArray(bmat->right));
512 
513       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
514       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
515       PetscCall(VecResetArray(bmat->right));
516       jrow++;
517     }
518     PetscCall(VecResetArray(bmat->left));
519     PetscCall(VecResetArray(bmat->middle));
520   }
521   PetscCall(VecRestoreArray(x,&xx));
522   PetscCall(VecRestoreArray(y,&yy));
523   PetscFunctionReturn(0);
524 }
525 
526 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
527 {
528   PetscFunctionBegin;
529   PetscFunctionReturn(0);
530 }
531 
532 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
533 {
534   PetscFunctionBegin;
535   PetscFunctionReturn(0);
536 }
537 
538 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
539 {
540   PetscFunctionBegin;
541   PetscFunctionReturn(0);
542 }
543 
544 /*
545      Adds diagonal pointers to sparse matrix structure.
546 */
547 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
548 {
549   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
550   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
551 
552   PetscFunctionBegin;
553   if (!a->diag) {
554     PetscCall(PetscMalloc1(mbs,&a->diag));
555   }
556   for (i=0; i<mbs; i++) {
557     a->diag[i] = a->i[i+1];
558     for (j=a->i[i]; j<a->i[i+1]; j++) {
559       if (a->j[j] == i) {
560         a->diag[i] = j;
561         break;
562       }
563     }
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
569 {
570   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
571   Mat_SeqAIJ     *c;
572   PetscInt       i,k,first,step,lensi,nrows,ncols;
573   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
574   PetscScalar    *a_new;
575   Mat            C,*aa = a->a;
576   PetscBool      stride,equal;
577 
578   PetscFunctionBegin;
579   PetscCall(ISEqual(isrow,iscol,&equal));
580   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
581   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
582   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
583   PetscCall(ISStrideGetInfo(iscol,&first,&step));
584   PetscCheck(step == A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
585 
586   PetscCall(ISGetLocalSize(isrow,&nrows));
587   ncols = nrows;
588 
589   /* create submatrix */
590   if (scall == MAT_REUSE_MATRIX) {
591     PetscInt n_cols,n_rows;
592     C    = *B;
593     PetscCall(MatGetSize(C,&n_rows,&n_cols));
594     PetscCheck(n_rows == nrows && n_cols == ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
595     PetscCall(MatZeroEntries(C));
596   } else {
597     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
598     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
599     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C,MATSEQSBAIJ));
600     else PetscCall(MatSetType(C,MATSEQAIJ));
601     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
602     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
603   }
604   c = (Mat_SeqAIJ*)C->data;
605 
606   /* loop over rows inserting into submatrix */
607   a_new = c->a;
608   j_new = c->j;
609   i_new = c->i;
610 
611   for (i=0; i<nrows; i++) {
612     lensi = ailen[i];
613     for (k=0; k<lensi; k++) {
614       *j_new++ = *aj++;
615       PetscCall(MatGetValue(*aa++,first,first,a_new++));
616     }
617     i_new[i+1] = i_new[i] + lensi;
618     c->ilen[i] = lensi;
619   }
620 
621   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
622   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
623   *B   = C;
624   PetscFunctionReturn(0);
625 }
626 
627 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
628 {
629   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
630   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
631   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
632   Mat            *aa    = a->a,*ap;
633 
634   PetscFunctionBegin;
635   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
636 
637   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
638   for (i=1; i<m; i++) {
639     /* move each row back by the amount of empty slots (fshift) before it*/
640     fshift += imax[i-1] - ailen[i-1];
641     rmax    = PetscMax(rmax,ailen[i]);
642     if (fshift) {
643       ip = aj + ai[i];
644       ap = aa + ai[i];
645       N  = ailen[i];
646       for (j=0; j<N; j++) {
647         ip[j-fshift] = ip[j];
648         ap[j-fshift] = ap[j];
649       }
650     }
651     ai[i] = ai[i-1] + ailen[i-1];
652   }
653   if (m) {
654     fshift += imax[m-1] - ailen[m-1];
655     ai[m]   = ai[m-1] + ailen[m-1];
656   }
657   /* reset ilen and imax for each row */
658   for (i=0; i<m; i++) {
659     ailen[i] = imax[i] = ai[i+1] - ai[i];
660   }
661   a->nz = ai[m];
662   for (i=0; i<a->nz; i++) {
663     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
664     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
665     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
666   }
667   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
668   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
669   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
670 
671   A->info.mallocs    += a->reallocs;
672   a->reallocs         = 0;
673   A->info.nz_unneeded = (double)fshift;
674   a->rmax             = rmax;
675   PetscCall(MatMarkDiagonal_BlockMat(A));
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
680 {
681   PetscFunctionBegin;
682   if (opt == MAT_SYMMETRIC && flg) {
683     A->ops->sor  = MatSOR_BlockMat_Symmetric;
684     A->ops->mult = MatMult_BlockMat_Symmetric;
685   } else {
686     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
692                                        NULL,
693                                        NULL,
694                                        MatMult_BlockMat,
695                                /*  4*/ MatMultAdd_BlockMat,
696                                        MatMultTranspose_BlockMat,
697                                        MatMultTransposeAdd_BlockMat,
698                                        NULL,
699                                        NULL,
700                                        NULL,
701                                /* 10*/ NULL,
702                                        NULL,
703                                        NULL,
704                                        MatSOR_BlockMat,
705                                        NULL,
706                                /* 15*/ NULL,
707                                        NULL,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                /* 20*/ NULL,
712                                        MatAssemblyEnd_BlockMat,
713                                        MatSetOption_BlockMat,
714                                        NULL,
715                                /* 24*/ NULL,
716                                        NULL,
717                                        NULL,
718                                        NULL,
719                                        NULL,
720                                /* 29*/ NULL,
721                                        NULL,
722                                        NULL,
723                                        NULL,
724                                        NULL,
725                                /* 34*/ NULL,
726                                        NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                /* 39*/ NULL,
731                                        NULL,
732                                        NULL,
733                                        NULL,
734                                        NULL,
735                                /* 44*/ NULL,
736                                        NULL,
737                                        MatShift_Basic,
738                                        NULL,
739                                        NULL,
740                                /* 49*/ NULL,
741                                        NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                /* 54*/ NULL,
746                                        NULL,
747                                        NULL,
748                                        NULL,
749                                        NULL,
750                                /* 59*/ MatCreateSubMatrix_BlockMat,
751                                        MatDestroy_BlockMat,
752                                        MatView_BlockMat,
753                                        NULL,
754                                        NULL,
755                                /* 64*/ NULL,
756                                        NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                /* 69*/ NULL,
761                                        NULL,
762                                        NULL,
763                                        NULL,
764                                        NULL,
765                                /* 74*/ NULL,
766                                        NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                /* 79*/ NULL,
771                                        NULL,
772                                        NULL,
773                                        NULL,
774                                        MatLoad_BlockMat,
775                                /* 84*/ NULL,
776                                        NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                /* 89*/ NULL,
781                                        NULL,
782                                        NULL,
783                                        NULL,
784                                        NULL,
785                                /* 94*/ NULL,
786                                        NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                /* 99*/ NULL,
791                                        NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                /*104*/ NULL,
796                                        NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                /*109*/ NULL,
801                                        NULL,
802                                        NULL,
803                                        NULL,
804                                        NULL,
805                                /*114*/ NULL,
806                                        NULL,
807                                        NULL,
808                                        NULL,
809                                        NULL,
810                                /*119*/ NULL,
811                                        NULL,
812                                        NULL,
813                                        NULL,
814                                        NULL,
815                                /*124*/ NULL,
816                                        NULL,
817                                        NULL,
818                                        NULL,
819                                        NULL,
820                                /*129*/ NULL,
821                                        NULL,
822                                        NULL,
823                                        NULL,
824                                        NULL,
825                                /*134*/ NULL,
826                                        NULL,
827                                        NULL,
828                                        NULL,
829                                        NULL,
830                                /*139*/ NULL,
831                                        NULL,
832                                        NULL,
833                                        NULL,
834                                        NULL,
835                                /*144*/ NULL,
836                                        NULL,
837                                        NULL,
838                                        NULL,
839                                        NULL,
840                                        NULL,
841                                /*150*/ NULL
842 };
843 
844 /*@C
845    MatBlockMatSetPreallocation - For good matrix assembly performance
846    the user should preallocate the matrix storage by setting the parameter nz
847    (or the array nnz).  By setting these parameters accurately, performance
848    during matrix assembly can be increased by more than a factor of 50.
849 
850    Collective
851 
852    Input Parameters:
853 +  B - The matrix
854 .  bs - size of each block in matrix
855 .  nz - number of nonzeros per block row (same for all rows)
856 -  nnz - array containing the number of nonzeros in the various block rows
857          (possibly different for each row) or NULL
858 
859    Notes:
860      If nnz is given then nz is ignored
861 
862    Specify the preallocated storage with either nz or nnz (not both).
863    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
864    allocation.  For large problems you MUST preallocate memory or you
865    will get TERRIBLE performance, see the users' manual chapter on matrices.
866 
867    Level: intermediate
868 
869 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
870 
871 @*/
872 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
873 {
874   PetscFunctionBegin;
875   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
876   PetscFunctionReturn(0);
877 }
878 
879 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
880 {
881   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
882   PetscInt       i;
883 
884   PetscFunctionBegin;
885   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
886   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
887   PetscCall(PetscLayoutSetUp(A->rmap));
888   PetscCall(PetscLayoutSetUp(A->cmap));
889   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
890 
891   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
892   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
893   if (nnz) {
894     for (i=0; i<A->rmap->n/bs; i++) {
895       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
896       PetscCheck(nnz[i] <= A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
897     }
898   }
899   bmat->mbs = A->rmap->n/bs;
900 
901   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
902   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
903   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
904 
905   if (!bmat->imax) {
906     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
907     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
908   }
909   if (PetscLikely(nnz)) {
910     nz = 0;
911     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
912       bmat->imax[i] = nnz[i];
913       nz           += nnz[i];
914     }
915   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
916 
917   /* bmat->ilen will count nonzeros in each row so far. */
918   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
919 
920   /* allocate the matrix space */
921   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
922   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
923   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
924   bmat->i[0] = 0;
925   for (i=1; i<bmat->mbs+1; i++) {
926     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
927   }
928   bmat->singlemalloc = PETSC_TRUE;
929   bmat->free_a       = PETSC_TRUE;
930   bmat->free_ij      = PETSC_TRUE;
931 
932   bmat->nz            = 0;
933   bmat->maxnz         = nz;
934   A->info.nz_unneeded = (double)bmat->maxnz;
935   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
936   PetscFunctionReturn(0);
937 }
938 
939 /*MC
940    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
941                  consisting of (usually) sparse blocks.
942 
943   Level: advanced
944 
945 .seealso: `MatCreateBlockMat()`
946 
947 M*/
948 
949 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
950 {
951   Mat_BlockMat   *b;
952 
953   PetscFunctionBegin;
954   PetscCall(PetscNewLog(A,&b));
955   A->data = (void*)b;
956   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
957 
958   A->assembled    = PETSC_TRUE;
959   A->preallocated = PETSC_FALSE;
960   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
961 
962   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
963   PetscFunctionReturn(0);
964 }
965 
966 /*@C
967    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
968 
969   Collective
970 
971    Input Parameters:
972 +  comm - MPI communicator
973 .  m - number of rows
974 .  n  - number of columns
975 .  bs - size of each submatrix
976 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
977 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
978 
979    Output Parameter:
980 .  A - the matrix
981 
982    Level: intermediate
983 
984    Notes:
985     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
986    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
987 
988    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
989 
990 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
991 @*/
992 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
993 {
994   PetscFunctionBegin;
995   PetscCall(MatCreate(comm,A));
996   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
997   PetscCall(MatSetType(*A,MATBLOCKMAT));
998   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
999   PetscFunctionReturn(0);
1000 }
1001