xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 2e3d3ef9bcea2ec3187269b59709989dccbaffee)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     PetscCall(PetscMalloc1(mbs,&a->diags));
42     PetscCall(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       PetscCall(ISDestroy(&row));
48       PetscCall(ISDestroy(&col));
49     }
50     PetscCall(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   PetscCall(VecSet(xx,0.0));
55   PetscCall(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   PetscCall(VecCopy(bb,a->workb));
58   PetscCall(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         PetscCall(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
72           PetscCall(MatMultAdd(v[j],right,left,left));
73           PetscCall(VecResetArray(right));
74         }
75         PetscCall(VecPlaceArray(right,b + i*bs));
76         PetscCall(VecAYPX(left,-1.0,right));
77         PetscCall(VecResetArray(right));
78 
79         PetscCall(VecPlaceArray(right,x + i*bs));
80         PetscCall(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           PetscCall(MatMultTranspose(v[j],right,left));
85           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
86           PetscCall(VecAXPY(middle,-1.0,left));
87           PetscCall(VecResetArray(middle));
88         }
89         PetscCall(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         PetscCall(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
103           PetscCall(MatMultAdd(v[j],right,left,left));
104           PetscCall(VecResetArray(right));
105         }
106         PetscCall(VecPlaceArray(right,b + i*bs));
107         PetscCall(VecAYPX(left,-1.0,right));
108         PetscCall(VecResetArray(right));
109 
110         PetscCall(VecPlaceArray(right,x + i*bs));
111         PetscCall(MatSolve(diag[i],left,right));
112         PetscCall(VecResetArray(right));
113 
114       }
115     }
116   }
117   PetscCall(VecRestoreArray(xx,&x));
118   PetscCall(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     PetscCall(PetscMalloc1(mbs,&a->diags));
144     PetscCall(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       PetscCall(ISDestroy(&row));
150       PetscCall(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   PetscCall(VecSet(xx,0.0));
156   PetscCall(VecGetArray(xx,&x));
157   PetscCall(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
172             PetscCall(MatMultAdd(v[j],right,left,left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right,b + i*bs));
177         PetscCall(VecAYPX(left,-1.0,right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right,x + i*bs));
181         PetscCall(MatSolve(diag[i],left,right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         PetscCall(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
196             PetscCall(MatMultAdd(v[j],right,left,left));
197             PetscCall(VecResetArray(right));
198           }
199         }
200         PetscCall(VecPlaceArray(right,b + i*bs));
201         PetscCall(VecAYPX(left,-1.0,right));
202         PetscCall(VecResetArray(right));
203 
204         PetscCall(VecPlaceArray(right,x + i*bs));
205         PetscCall(MatSolve(diag[i],left,right));
206         PetscCall(VecResetArray(right));
207 
208       }
209     }
210   }
211   PetscCall(VecRestoreArray(xx,&x));
212   PetscCall(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   PetscErrorCode    ierr;
288   Mat               tmpA;
289   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
290   const PetscInt    *cols;
291   const PetscScalar *values;
292   PetscBool         flg = PETSC_FALSE,notdone;
293   Mat_SeqAIJ        *a;
294   Mat_BlockMat      *amat;
295 
296   PetscFunctionBegin;
297   /* force binary viewer to load .info file if it has not yet done so */
298   PetscCall(PetscViewerSetUp(viewer));
299   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
300   PetscCall(MatSetType(tmpA,MATSEQAIJ));
301   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
302 
303   PetscCall(MatGetLocalSize(tmpA,&m,&n));
304   ierr = PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");PetscCall(ierr);
305   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
306   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
307   ierr = PetscOptionsEnd();PetscCall(ierr);
308 
309   /* Determine number of nonzero blocks for each block row */
310   a    = (Mat_SeqAIJ*) tmpA->data;
311   mbs  = m/bs;
312   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
313   PetscCall(PetscArrayzero(lens,mbs));
314 
315   for (i=0; i<mbs; i++) {
316     for (j=0; j<bs; j++) {
317       ii[j]    = a->j + a->i[i*bs + j];
318       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
319     }
320 
321     currentcol = -1;
322     while (PETSC_TRUE) {
323       notdone = PETSC_FALSE;
324       nextcol = 1000000000;
325       for (j=0; j<bs; j++) {
326         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
327           ii[j]++;
328           ilens[j]--;
329         }
330         if (ilens[j] > 0) {
331           notdone = PETSC_TRUE;
332           nextcol = PetscMin(nextcol,ii[j][0]/bs);
333         }
334       }
335       if (!notdone) break;
336       if (!flg || (nextcol >= i)) lens[i]++;
337       currentcol = nextcol;
338     }
339   }
340 
341   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
342     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
343   }
344   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
345   if (flg) {
346     PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
347   }
348   amat = (Mat_BlockMat*)(newmat)->data;
349 
350   /* preallocate the submatrices */
351   PetscCall(PetscMalloc1(bs,&llens));
352   for (i=0; i<mbs; i++) { /* loops for block rows */
353     for (j=0; j<bs; j++) {
354       ii[j]    = a->j + a->i[i*bs + j];
355       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
356     }
357 
358     currentcol = 1000000000;
359     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
360       if (ilens[j] > 0) {
361         currentcol = PetscMin(currentcol,ii[j][0]/bs);
362       }
363     }
364 
365     while (PETSC_TRUE) {  /* loops over blocks in block row */
366       notdone = PETSC_FALSE;
367       nextcol = 1000000000;
368       PetscCall(PetscArrayzero(llens,bs));
369       for (j=0; j<bs; j++) { /* loop over rows in block */
370         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
371           ii[j]++;
372           ilens[j]--;
373           llens[j]++;
374         }
375         if (ilens[j] > 0) {
376           notdone = PETSC_TRUE;
377           nextcol = PetscMin(nextcol,ii[j][0]/bs);
378         }
379       }
380       PetscCheckFalse(cnt >= amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
381       if (!flg || currentcol >= i) {
382         amat->j[cnt] = currentcol;
383         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
384       }
385 
386       if (!notdone) break;
387       currentcol = nextcol;
388     }
389     amat->ilen[i] = lens[i];
390   }
391 
392   PetscCall(PetscFree3(lens,ii,ilens));
393   PetscCall(PetscFree(llens));
394 
395   /* copy over the matrix, one row at a time */
396   for (i=0; i<m; i++) {
397     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
398     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
399     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
400   }
401   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
402   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
403   PetscFunctionReturn(0);
404 }
405 
406 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
407 {
408   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
409   const char        *name;
410   PetscViewerFormat format;
411 
412   PetscFunctionBegin;
413   PetscCall(PetscObjectGetName((PetscObject)A,&name));
414   PetscCall(PetscViewerGetFormat(viewer,&format));
415   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
416     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
417     if (A->symmetric) {
418       PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
419     }
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
425 {
426   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
427   PetscInt       i;
428 
429   PetscFunctionBegin;
430   PetscCall(VecDestroy(&bmat->right));
431   PetscCall(VecDestroy(&bmat->left));
432   PetscCall(VecDestroy(&bmat->middle));
433   PetscCall(VecDestroy(&bmat->workb));
434   if (bmat->diags) {
435     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
436       PetscCall(MatDestroy(&bmat->diags[i]));
437     }
438   }
439   if (bmat->a) {
440     for (i=0; i<bmat->nz; i++) {
441       PetscCall(MatDestroy(&bmat->a[i]));
442     }
443   }
444   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
445   PetscCall(PetscFree(mat->data));
446   PetscFunctionReturn(0);
447 }
448 
449 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
450 {
451   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
452   PetscScalar    *xx,*yy;
453   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
454   Mat            *aa;
455 
456   PetscFunctionBegin;
457   /*
458      Standard CSR multiply except each entry is a Mat
459   */
460   PetscCall(VecGetArray(x,&xx));
461 
462   PetscCall(VecSet(y,0.0));
463   PetscCall(VecGetArray(y,&yy));
464   aj   = bmat->j;
465   aa   = bmat->a;
466   ii   = bmat->i;
467   for (i=0; i<m; i++) {
468     jrow = ii[i];
469     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
470     n    = ii[i+1] - jrow;
471     for (j=0; j<n; j++) {
472       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
473       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
474       PetscCall(VecResetArray(bmat->right));
475       jrow++;
476     }
477     PetscCall(VecResetArray(bmat->left));
478   }
479   PetscCall(VecRestoreArray(x,&xx));
480   PetscCall(VecRestoreArray(y,&yy));
481   PetscFunctionReturn(0);
482 }
483 
484 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
485 {
486   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
487   PetscScalar    *xx,*yy;
488   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
489   Mat            *aa;
490 
491   PetscFunctionBegin;
492   /*
493      Standard CSR multiply except each entry is a Mat
494   */
495   PetscCall(VecGetArray(x,&xx));
496 
497   PetscCall(VecSet(y,0.0));
498   PetscCall(VecGetArray(y,&yy));
499   aj   = bmat->j;
500   aa   = bmat->a;
501   ii   = bmat->i;
502   for (i=0; i<m; i++) {
503     jrow = ii[i];
504     n    = ii[i+1] - jrow;
505     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
506     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
507     /* if we ALWAYS required a diagonal entry then could remove this if test */
508     if (aj[jrow] == i) {
509       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
510       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
511       PetscCall(VecResetArray(bmat->right));
512       jrow++;
513       n--;
514     }
515     for (j=0; j<n; j++) {
516       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
517       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
518       PetscCall(VecResetArray(bmat->right));
519 
520       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
521       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
522       PetscCall(VecResetArray(bmat->right));
523       jrow++;
524     }
525     PetscCall(VecResetArray(bmat->left));
526     PetscCall(VecResetArray(bmat->middle));
527   }
528   PetscCall(VecRestoreArray(x,&xx));
529   PetscCall(VecRestoreArray(y,&yy));
530   PetscFunctionReturn(0);
531 }
532 
533 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
534 {
535   PetscFunctionBegin;
536   PetscFunctionReturn(0);
537 }
538 
539 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
540 {
541   PetscFunctionBegin;
542   PetscFunctionReturn(0);
543 }
544 
545 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
546 {
547   PetscFunctionBegin;
548   PetscFunctionReturn(0);
549 }
550 
551 /*
552      Adds diagonal pointers to sparse matrix structure.
553 */
554 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
555 {
556   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
557   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
558 
559   PetscFunctionBegin;
560   if (!a->diag) {
561     PetscCall(PetscMalloc1(mbs,&a->diag));
562   }
563   for (i=0; i<mbs; i++) {
564     a->diag[i] = a->i[i+1];
565     for (j=a->i[i]; j<a->i[i+1]; j++) {
566       if (a->j[j] == i) {
567         a->diag[i] = j;
568         break;
569       }
570     }
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
576 {
577   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
578   Mat_SeqAIJ     *c;
579   PetscInt       i,k,first,step,lensi,nrows,ncols;
580   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
581   PetscScalar    *a_new;
582   Mat            C,*aa = a->a;
583   PetscBool      stride,equal;
584 
585   PetscFunctionBegin;
586   PetscCall(ISEqual(isrow,iscol,&equal));
587   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
588   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
589   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
590   PetscCall(ISStrideGetInfo(iscol,&first,&step));
591   PetscCheckFalse(step != A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
592 
593   PetscCall(ISGetLocalSize(isrow,&nrows));
594   ncols = nrows;
595 
596   /* create submatrix */
597   if (scall == MAT_REUSE_MATRIX) {
598     PetscInt n_cols,n_rows;
599     C    = *B;
600     PetscCall(MatGetSize(C,&n_rows,&n_cols));
601     PetscCheckFalse(n_rows != nrows || n_cols != ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
602     PetscCall(MatZeroEntries(C));
603   } else {
604     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
605     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
606     if (A->symmetric) {
607       PetscCall(MatSetType(C,MATSEQSBAIJ));
608     } else {
609       PetscCall(MatSetType(C,MATSEQAIJ));
610     }
611     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
612     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
613   }
614   c = (Mat_SeqAIJ*)C->data;
615 
616   /* loop over rows inserting into submatrix */
617   a_new = c->a;
618   j_new = c->j;
619   i_new = c->i;
620 
621   for (i=0; i<nrows; i++) {
622     lensi = ailen[i];
623     for (k=0; k<lensi; k++) {
624       *j_new++ = *aj++;
625       PetscCall(MatGetValue(*aa++,first,first,a_new++));
626     }
627     i_new[i+1] = i_new[i] + lensi;
628     c->ilen[i] = lensi;
629   }
630 
631   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
632   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
633   *B   = C;
634   PetscFunctionReturn(0);
635 }
636 
637 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
638 {
639   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
640   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
641   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
642   Mat            *aa    = a->a,*ap;
643 
644   PetscFunctionBegin;
645   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
646 
647   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
648   for (i=1; i<m; i++) {
649     /* move each row back by the amount of empty slots (fshift) before it*/
650     fshift += imax[i-1] - ailen[i-1];
651     rmax    = PetscMax(rmax,ailen[i]);
652     if (fshift) {
653       ip = aj + ai[i];
654       ap = aa + ai[i];
655       N  = ailen[i];
656       for (j=0; j<N; j++) {
657         ip[j-fshift] = ip[j];
658         ap[j-fshift] = ap[j];
659       }
660     }
661     ai[i] = ai[i-1] + ailen[i-1];
662   }
663   if (m) {
664     fshift += imax[m-1] - ailen[m-1];
665     ai[m]   = ai[m-1] + ailen[m-1];
666   }
667   /* reset ilen and imax for each row */
668   for (i=0; i<m; i++) {
669     ailen[i] = imax[i] = ai[i+1] - ai[i];
670   }
671   a->nz = ai[m];
672   for (i=0; i<a->nz; i++) {
673     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
674     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
675     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
676   }
677   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
678   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
679   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
680 
681   A->info.mallocs    += a->reallocs;
682   a->reallocs         = 0;
683   A->info.nz_unneeded = (double)fshift;
684   a->rmax             = rmax;
685   PetscCall(MatMarkDiagonal_BlockMat(A));
686   PetscFunctionReturn(0);
687 }
688 
689 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
690 {
691   PetscFunctionBegin;
692   if (opt == MAT_SYMMETRIC && flg) {
693     A->ops->sor  = MatSOR_BlockMat_Symmetric;
694     A->ops->mult = MatMult_BlockMat_Symmetric;
695   } else {
696     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
702                                        NULL,
703                                        NULL,
704                                        MatMult_BlockMat,
705                                /*  4*/ MatMultAdd_BlockMat,
706                                        MatMultTranspose_BlockMat,
707                                        MatMultTransposeAdd_BlockMat,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                /* 10*/ NULL,
712                                        NULL,
713                                        NULL,
714                                        MatSOR_BlockMat,
715                                        NULL,
716                                /* 15*/ NULL,
717                                        NULL,
718                                        NULL,
719                                        NULL,
720                                        NULL,
721                                /* 20*/ NULL,
722                                        MatAssemblyEnd_BlockMat,
723                                        MatSetOption_BlockMat,
724                                        NULL,
725                                /* 24*/ NULL,
726                                        NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                /* 29*/ NULL,
731                                        NULL,
732                                        NULL,
733                                        NULL,
734                                        NULL,
735                                /* 34*/ NULL,
736                                        NULL,
737                                        NULL,
738                                        NULL,
739                                        NULL,
740                                /* 39*/ NULL,
741                                        NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                /* 44*/ NULL,
746                                        NULL,
747                                        MatShift_Basic,
748                                        NULL,
749                                        NULL,
750                                /* 49*/ NULL,
751                                        NULL,
752                                        NULL,
753                                        NULL,
754                                        NULL,
755                                /* 54*/ NULL,
756                                        NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                /* 59*/ MatCreateSubMatrix_BlockMat,
761                                        MatDestroy_BlockMat,
762                                        MatView_BlockMat,
763                                        NULL,
764                                        NULL,
765                                /* 64*/ NULL,
766                                        NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                /* 69*/ NULL,
771                                        NULL,
772                                        NULL,
773                                        NULL,
774                                        NULL,
775                                /* 74*/ NULL,
776                                        NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                /* 79*/ NULL,
781                                        NULL,
782                                        NULL,
783                                        NULL,
784                                        MatLoad_BlockMat,
785                                /* 84*/ NULL,
786                                        NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                /* 89*/ NULL,
791                                        NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                /* 94*/ NULL,
796                                        NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                /* 99*/ NULL,
801                                        NULL,
802                                        NULL,
803                                        NULL,
804                                        NULL,
805                                /*104*/ NULL,
806                                        NULL,
807                                        NULL,
808                                        NULL,
809                                        NULL,
810                                /*109*/ NULL,
811                                        NULL,
812                                        NULL,
813                                        NULL,
814                                        NULL,
815                                /*114*/ NULL,
816                                        NULL,
817                                        NULL,
818                                        NULL,
819                                        NULL,
820                                /*119*/ NULL,
821                                        NULL,
822                                        NULL,
823                                        NULL,
824                                        NULL,
825                                /*124*/ NULL,
826                                        NULL,
827                                        NULL,
828                                        NULL,
829                                        NULL,
830                                /*129*/ NULL,
831                                        NULL,
832                                        NULL,
833                                        NULL,
834                                        NULL,
835                                /*134*/ NULL,
836                                        NULL,
837                                        NULL,
838                                        NULL,
839                                        NULL,
840                                /*139*/ NULL,
841                                        NULL,
842                                        NULL,
843                                        NULL,
844                                        NULL,
845                                /*144*/ NULL,
846                                        NULL,
847                                        NULL,
848                                        NULL
849 };
850 
851 /*@C
852    MatBlockMatSetPreallocation - For good matrix assembly performance
853    the user should preallocate the matrix storage by setting the parameter nz
854    (or the array nnz).  By setting these parameters accurately, performance
855    during matrix assembly can be increased by more than a factor of 50.
856 
857    Collective
858 
859    Input Parameters:
860 +  B - The matrix
861 .  bs - size of each block in matrix
862 .  nz - number of nonzeros per block row (same for all rows)
863 -  nnz - array containing the number of nonzeros in the various block rows
864          (possibly different for each row) or NULL
865 
866    Notes:
867      If nnz is given then nz is ignored
868 
869    Specify the preallocated storage with either nz or nnz (not both).
870    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
871    allocation.  For large problems you MUST preallocate memory or you
872    will get TERRIBLE performance, see the users' manual chapter on matrices.
873 
874    Level: intermediate
875 
876 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()
877 
878 @*/
879 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
880 {
881   PetscFunctionBegin;
882   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
883   PetscFunctionReturn(0);
884 }
885 
886 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
887 {
888   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
889   PetscInt       i;
890 
891   PetscFunctionBegin;
892   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
893   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
894   PetscCall(PetscLayoutSetUp(A->rmap));
895   PetscCall(PetscLayoutSetUp(A->cmap));
896   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
897 
898   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
899   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
900   if (nnz) {
901     for (i=0; i<A->rmap->n/bs; i++) {
902       PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
903       PetscCheckFalse(nnz[i] > A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
904     }
905   }
906   bmat->mbs = A->rmap->n/bs;
907 
908   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
909   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
910   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
911 
912   if (!bmat->imax) {
913     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
914     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
915   }
916   if (PetscLikely(nnz)) {
917     nz = 0;
918     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
919       bmat->imax[i] = nnz[i];
920       nz           += nnz[i];
921     }
922   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
923 
924   /* bmat->ilen will count nonzeros in each row so far. */
925   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
926 
927   /* allocate the matrix space */
928   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
929   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
930   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
931   bmat->i[0] = 0;
932   for (i=1; i<bmat->mbs+1; i++) {
933     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
934   }
935   bmat->singlemalloc = PETSC_TRUE;
936   bmat->free_a       = PETSC_TRUE;
937   bmat->free_ij      = PETSC_TRUE;
938 
939   bmat->nz            = 0;
940   bmat->maxnz         = nz;
941   A->info.nz_unneeded = (double)bmat->maxnz;
942   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
943   PetscFunctionReturn(0);
944 }
945 
946 /*MC
947    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
948                  consisting of (usually) sparse blocks.
949 
950   Level: advanced
951 
952 .seealso: MatCreateBlockMat()
953 
954 M*/
955 
956 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
957 {
958   Mat_BlockMat   *b;
959 
960   PetscFunctionBegin;
961   PetscCall(PetscNewLog(A,&b));
962   A->data = (void*)b;
963   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
964 
965   A->assembled    = PETSC_TRUE;
966   A->preallocated = PETSC_FALSE;
967   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
968 
969   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
970   PetscFunctionReturn(0);
971 }
972 
973 /*@C
974    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
975 
976   Collective
977 
978    Input Parameters:
979 +  comm - MPI communicator
980 .  m - number of rows
981 .  n  - number of columns
982 .  bs - size of each submatrix
983 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
984 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
985 
986    Output Parameter:
987 .  A - the matrix
988 
989    Level: intermediate
990 
991    Notes:
992     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
993    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
994 
995    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
996 
997 .seealso: MATBLOCKMAT, MatCreateNest()
998 @*/
999 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1000 {
1001   PetscFunctionBegin;
1002   PetscCall(MatCreate(comm,A));
1003   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
1004   PetscCall(MatSetType(*A,MATBLOCKMAT));
1005   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1006   PetscFunctionReturn(0);
1007 }
1008