xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 2e16c0ce58b3a4ec287cbc0a0807bfb0a0fa5ac9)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31 {
32   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer,&format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
53   PetscCall(MatView(Adense,viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59 {
60   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61   PetscLogDouble isend[2],irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld*a->locc;     /* locally allocated */
67   isend[1] = a->locr*a->locc;    /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated   = isend[0];
70     info->nz_used        = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73     info->nz_allocated   = irecv[0];
74     info->nz_used        = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = ((PetscObject)A)->mem;
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92 {
93   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
94 
95   PetscFunctionBegin;
96   switch (op) {
97     case MAT_NEW_NONZERO_LOCATIONS:
98     case MAT_NEW_NONZERO_LOCATION_ERR:
99     case MAT_NEW_NONZERO_ALLOCATION_ERR:
100     case MAT_SYMMETRIC:
101     case MAT_SORTED_FULL:
102     case MAT_HERMITIAN:
103       break;
104     case MAT_ROW_ORIENTED:
105       a->roworiented = flg;
106       break;
107     default:
108       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
114 {
115   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
116   PetscInt       i,j;
117   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
118   PetscBool      roworiented = a->roworiented;
119 
120   PetscFunctionBegin;
121   PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
122   for (i=0;i<nr;i++) {
123     if (rows[i] < 0) continue;
124     PetscCall(PetscBLASIntCast(rows[i]+1,&gridx));
125     for (j=0;j<nc;j++) {
126       if (cols[j] < 0) continue;
127       PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx));
128       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
129       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
130         if (roworiented) {
131           switch (imode) {
132             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
133             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
134           }
135         } else {
136           switch (imode) {
137             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break;
138             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break;
139           }
140         }
141       } else {
142         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
143         A->assembled = PETSC_FALSE;
144         PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES)));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
152 {
153   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
154   PetscScalar    *x2d,*y2d,alpha=1.0;
155   const PetscInt *ranges;
156   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
157 
158   PetscFunctionBegin;
159   if (transpose) {
160 
161     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
162     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
163     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
164     xlld = PetscMax(1,A->rmap->n);
165     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
166     PetscCheckScaLapackInfo("descinit",info);
167     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
168     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
169     ylld = 1;
170     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
171     PetscCheckScaLapackInfo("descinit",info);
172 
173     /* allocate 2d vectors */
174     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
175     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
176     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
177     xlld = PetscMax(1,lszx);
178 
179     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
183     PetscCheckScaLapackInfo("descinit",info);
184 
185     /* redistribute x as a column of a 2d matrix */
186     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
187 
188     /* redistribute y as a row of a 2d matrix */
189     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
190 
191     /* call PBLAS subroutine */
192     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
193 
194     /* redistribute y from a row of a 2d matrix */
195     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
196 
197   } else {   /* non-transpose */
198 
199     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
200     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
201     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
202     xlld = 1;
203     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
204     PetscCheckScaLapackInfo("descinit",info);
205     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
206     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
207     ylld = PetscMax(1,A->rmap->n);
208     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
209     PetscCheckScaLapackInfo("descinit",info);
210 
211     /* allocate 2d vectors */
212     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
213     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
214     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
215     ylld = PetscMax(1,lszy);
216 
217     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
218     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
219     PetscCheckScaLapackInfo("descinit",info);
220     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
221     PetscCheckScaLapackInfo("descinit",info);
222 
223     /* redistribute x as a row of a 2d matrix */
224     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
225 
226     /* redistribute y as a column of a 2d matrix */
227     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
228 
229     /* call PBLAS subroutine */
230     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
231 
232     /* redistribute y from a column of a 2d matrix */
233     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
234 
235   }
236   PetscCall(PetscFree2(x2d,y2d));
237   PetscFunctionReturn(0);
238 }
239 
240 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
241 {
242   const PetscScalar *xarray;
243   PetscScalar       *yarray;
244 
245   PetscFunctionBegin;
246   PetscCall(VecGetArrayRead(x,&xarray));
247   PetscCall(VecGetArray(y,&yarray));
248   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
249   PetscCall(VecRestoreArrayRead(x,&xarray));
250   PetscCall(VecRestoreArray(y,&yarray));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
255 {
256   const PetscScalar *xarray;
257   PetscScalar       *yarray;
258 
259   PetscFunctionBegin;
260   PetscCall(VecGetArrayRead(x,&xarray));
261   PetscCall(VecGetArray(y,&yarray));
262   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
263   PetscCall(VecRestoreArrayRead(x,&xarray));
264   PetscCall(VecRestoreArray(y,&yarray));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   const PetscScalar *xarray;
271   PetscScalar       *zarray;
272 
273   PetscFunctionBegin;
274   if (y != z) PetscCall(VecCopy(y,z));
275   PetscCall(VecGetArrayRead(x,&xarray));
276   PetscCall(VecGetArray(z,&zarray));
277   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
278   PetscCall(VecRestoreArrayRead(x,&xarray));
279   PetscCall(VecRestoreArray(z,&zarray));
280   PetscFunctionReturn(0);
281 }
282 
283 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
284 {
285   const PetscScalar *xarray;
286   PetscScalar       *zarray;
287 
288   PetscFunctionBegin;
289   if (y != z) PetscCall(VecCopy(y,z));
290   PetscCall(VecGetArrayRead(x,&xarray));
291   PetscCall(VecGetArray(z,&zarray));
292   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
293   PetscCall(VecRestoreArrayRead(x,&xarray));
294   PetscCall(VecRestoreArray(z,&zarray));
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
299 {
300   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
301   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
302   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
303   PetscScalar   sone=1.0,zero=0.0;
304   PetscBLASInt  one=1;
305 
306   PetscFunctionBegin;
307   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
308   C->assembled = PETSC_TRUE;
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
313 {
314   PetscFunctionBegin;
315   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
316   PetscCall(MatSetType(C,MATSCALAPACK));
317   PetscCall(MatSetUp(C));
318   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
319   PetscFunctionReturn(0);
320 }
321 
322 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
323 {
324   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
325   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
326   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
327   PetscScalar   sone=1.0,zero=0.0;
328   PetscBLASInt  one=1;
329 
330   PetscFunctionBegin;
331   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
332   C->assembled = PETSC_TRUE;
333   PetscFunctionReturn(0);
334 }
335 
336 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
337 {
338   PetscFunctionBegin;
339   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
340   PetscCall(MatSetType(C,MATSCALAPACK));
341   PetscCall(MatSetUp(C));
342   PetscFunctionReturn(0);
343 }
344 
345 /* --------------------------------------- */
346 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
347 {
348   PetscFunctionBegin;
349   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
350   C->ops->productsymbolic = MatProductSymbolic_AB;
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
355 {
356   PetscFunctionBegin;
357   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
358   C->ops->productsymbolic          = MatProductSymbolic_ABt;
359   PetscFunctionReturn(0);
360 }
361 
362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
363 {
364   Mat_Product    *product = C->product;
365 
366   PetscFunctionBegin;
367   switch (product->type) {
368     case MATPRODUCT_AB:
369       PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
370       break;
371     case MATPRODUCT_ABt:
372       PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
373       break;
374     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
375   }
376   PetscFunctionReturn(0);
377 }
378 /* --------------------------------------- */
379 
380 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
381 {
382   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
383   PetscScalar       *darray,*d2d,v;
384   const PetscInt    *ranges;
385   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
386 
387   PetscFunctionBegin;
388   PetscCall(VecGetArray(D,&darray));
389 
390   if (A->rmap->N<=A->cmap->N) {   /* row version */
391 
392     /* create ScaLAPACK descriptor for vector (1d block distribution) */
393     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
394     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
395     dlld = PetscMax(1,A->rmap->n);
396     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
397     PetscCheckScaLapackInfo("descinit",info);
398 
399     /* allocate 2d vector */
400     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
401     PetscCall(PetscCalloc1(lszd,&d2d));
402     dlld = PetscMax(1,lszd);
403 
404     /* create ScaLAPACK descriptor for vector (2d block distribution) */
405     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
406     PetscCheckScaLapackInfo("descinit",info);
407 
408     /* collect diagonal */
409     for (j=1;j<=a->M;j++) {
410       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
411       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
412     }
413 
414     /* redistribute d from a column of a 2d matrix */
415     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
416     PetscCall(PetscFree(d2d));
417 
418   } else {   /* column version */
419 
420     /* create ScaLAPACK descriptor for vector (1d block distribution) */
421     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
422     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
423     dlld = 1;
424     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* allocate 2d vector */
428     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
429     PetscCall(PetscCalloc1(lszd,&d2d));
430 
431     /* create ScaLAPACK descriptor for vector (2d block distribution) */
432     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
433     PetscCheckScaLapackInfo("descinit",info);
434 
435     /* collect diagonal */
436     for (j=1;j<=a->N;j++) {
437       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
438       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
439     }
440 
441     /* redistribute d from a row of a 2d matrix */
442     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
443     PetscCall(PetscFree(d2d));
444   }
445 
446   PetscCall(VecRestoreArray(D,&darray));
447   PetscCall(VecAssemblyBegin(D));
448   PetscCall(VecAssemblyEnd(D));
449   PetscFunctionReturn(0);
450 }
451 
452 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
453 {
454   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
455   const PetscScalar *d;
456   const PetscInt    *ranges;
457   PetscScalar       *d2d;
458   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
459 
460   PetscFunctionBegin;
461   if (R) {
462     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
463     /* create ScaLAPACK descriptor for vector (1d block distribution) */
464     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
465     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
466     dlld = 1;
467     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
468     PetscCheckScaLapackInfo("descinit",info);
469 
470     /* allocate 2d vector */
471     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
472     PetscCall(PetscCalloc1(lszd,&d2d));
473 
474     /* create ScaLAPACK descriptor for vector (2d block distribution) */
475     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
476     PetscCheckScaLapackInfo("descinit",info);
477 
478     /* redistribute d to a row of a 2d matrix */
479     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
480 
481     /* broadcast along process columns */
482     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
483     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
484 
485     /* local scaling */
486     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
487 
488     PetscCall(PetscFree(d2d));
489     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
490   }
491   if (L) {
492     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
493     /* create ScaLAPACK descriptor for vector (1d block distribution) */
494     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
495     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
496     dlld = PetscMax(1,A->rmap->n);
497     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
498     PetscCheckScaLapackInfo("descinit",info);
499 
500     /* allocate 2d vector */
501     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
502     PetscCall(PetscCalloc1(lszd,&d2d));
503     dlld = PetscMax(1,lszd);
504 
505     /* create ScaLAPACK descriptor for vector (2d block distribution) */
506     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
507     PetscCheckScaLapackInfo("descinit",info);
508 
509     /* redistribute d to a column of a 2d matrix */
510     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
511 
512     /* broadcast along process rows */
513     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
514     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
515 
516     /* local scaling */
517     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
518 
519     PetscCall(PetscFree(d2d));
520     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
526 {
527   PetscFunctionBegin;
528   *missing = PETSC_FALSE;
529   PetscFunctionReturn(0);
530 }
531 
532 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
533 {
534   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
535   PetscBLASInt  n,one=1;
536 
537   PetscFunctionBegin;
538   n = x->lld*x->locc;
539   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
540   PetscFunctionReturn(0);
541 }
542 
543 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
544 {
545   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
546   PetscBLASInt  i,n;
547   PetscScalar   v;
548 
549   PetscFunctionBegin;
550   n = PetscMin(x->M,x->N);
551   for (i=1;i<=n;i++) {
552     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
553     v += alpha;
554     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
560 {
561   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
562   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
563   PetscBLASInt   one=1;
564   PetscScalar    beta=1.0;
565 
566   PetscFunctionBegin;
567   MatScaLAPACKCheckDistribution(Y,1,X,3);
568   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
569   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
570   PetscFunctionReturn(0);
571 }
572 
573 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
574 {
575   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
576   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
577 
578   PetscFunctionBegin;
579   PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
580   PetscCall(PetscObjectStateIncrease((PetscObject)B));
581   PetscFunctionReturn(0);
582 }
583 
584 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
585 {
586   Mat            Bs;
587   MPI_Comm       comm;
588   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
589 
590   PetscFunctionBegin;
591   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
592   PetscCall(MatCreate(comm,&Bs));
593   PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
594   PetscCall(MatSetType(Bs,MATSCALAPACK));
595   b = (Mat_ScaLAPACK*)Bs->data;
596   b->M    = a->M;
597   b->N    = a->N;
598   b->mb   = a->mb;
599   b->nb   = a->nb;
600   b->rsrc = a->rsrc;
601   b->csrc = a->csrc;
602   PetscCall(MatSetUp(Bs));
603   *B = Bs;
604   if (op == MAT_COPY_VALUES) {
605     PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
606   }
607   Bs->assembled = PETSC_TRUE;
608   PetscFunctionReturn(0);
609 }
610 
611 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
612 {
613   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
614   Mat            Bs = *B;
615   PetscBLASInt   one=1;
616   PetscScalar    sone=1.0,zero=0.0;
617 #if defined(PETSC_USE_COMPLEX)
618   PetscInt       i,j;
619 #endif
620 
621   PetscFunctionBegin;
622   if (reuse == MAT_INITIAL_MATRIX) {
623     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
624     *B = Bs;
625   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626   b = (Mat_ScaLAPACK*)Bs->data;
627   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628 #if defined(PETSC_USE_COMPLEX)
629   /* undo conjugation */
630   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631 #endif
632   Bs->assembled = PETSC_TRUE;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637 {
638   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639   PetscInt      i,j;
640 
641   PetscFunctionBegin;
642   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647 {
648   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
649   Mat            Bs = *B;
650   PetscBLASInt   one=1;
651   PetscScalar    sone=1.0,zero=0.0;
652 
653   PetscFunctionBegin;
654   if (reuse == MAT_INITIAL_MATRIX) {
655     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
656     *B = Bs;
657   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
658   b = (Mat_ScaLAPACK*)Bs->data;
659   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
660   Bs->assembled = PETSC_TRUE;
661   PetscFunctionReturn(0);
662 }
663 
664 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
665 {
666   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
667   PetscScalar    *x,*x2d;
668   const PetscInt *ranges;
669   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
670 
671   PetscFunctionBegin;
672   PetscCall(VecCopy(B,X));
673   PetscCall(VecGetArray(X,&x));
674 
675   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
676   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
677   PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
678   xlld = PetscMax(1,A->rmap->n);
679   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
680   PetscCheckScaLapackInfo("descinit",info);
681 
682   /* allocate 2d vector */
683   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
684   PetscCall(PetscMalloc1(lszx,&x2d));
685   xlld = PetscMax(1,lszx);
686 
687   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
688   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
689   PetscCheckScaLapackInfo("descinit",info);
690 
691   /* redistribute x as a column of a 2d matrix */
692   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
693 
694   /* call ScaLAPACK subroutine */
695   switch (A->factortype) {
696     case MAT_FACTOR_LU:
697       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
698       PetscCheckScaLapackInfo("getrs",info);
699       break;
700     case MAT_FACTOR_CHOLESKY:
701       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
702       PetscCheckScaLapackInfo("potrs",info);
703       break;
704     default:
705       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
706   }
707 
708   /* redistribute x from a column of a 2d matrix */
709   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
710 
711   PetscCall(PetscFree(x2d));
712   PetscCall(VecRestoreArray(X,&x));
713   PetscFunctionReturn(0);
714 }
715 
716 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
717 {
718   PetscFunctionBegin;
719   PetscCall(MatSolve_ScaLAPACK(A,B,X));
720   PetscCall(VecAXPY(X,1,Y));
721   PetscFunctionReturn(0);
722 }
723 
724 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
725 {
726   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
727   PetscBool      flg1,flg2;
728   PetscBLASInt   one=1,info;
729 
730   PetscFunctionBegin;
731   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
732   PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
733   PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
734   MatScaLAPACKCheckDistribution(B,1,X,2);
735   b = (Mat_ScaLAPACK*)B->data;
736   x = (Mat_ScaLAPACK*)X->data;
737   PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
738 
739   switch (A->factortype) {
740     case MAT_FACTOR_LU:
741       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
742       PetscCheckScaLapackInfo("getrs",info);
743       break;
744     case MAT_FACTOR_CHOLESKY:
745       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
746       PetscCheckScaLapackInfo("potrs",info);
747       break;
748     default:
749       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
757   PetscBLASInt   one=1,info;
758 
759   PetscFunctionBegin;
760   if (!a->pivots) {
761     PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots));
762     PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
763   }
764   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
765   PetscCheckScaLapackInfo("getrf",info);
766   A->factortype = MAT_FACTOR_LU;
767   A->assembled  = PETSC_TRUE;
768 
769   PetscCall(PetscFree(A->solvertype));
770   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
771   PetscFunctionReturn(0);
772 }
773 
774 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
775 {
776   PetscFunctionBegin;
777   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
778   PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info));
779   PetscFunctionReturn(0);
780 }
781 
782 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
783 {
784   PetscFunctionBegin;
785   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
786   PetscFunctionReturn(0);
787 }
788 
789 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
790 {
791   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
792   PetscBLASInt   one=1,info;
793 
794   PetscFunctionBegin;
795   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
796   PetscCheckScaLapackInfo("potrf",info);
797   A->factortype = MAT_FACTOR_CHOLESKY;
798   A->assembled  = PETSC_TRUE;
799 
800   PetscCall(PetscFree(A->solvertype));
801   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
802   PetscFunctionReturn(0);
803 }
804 
805 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
806 {
807   PetscFunctionBegin;
808   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
809   PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info));
810   PetscFunctionReturn(0);
811 }
812 
813 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
814 {
815   PetscFunctionBegin;
816   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
821 {
822   PetscFunctionBegin;
823   *type = MATSOLVERSCALAPACK;
824   PetscFunctionReturn(0);
825 }
826 
827 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
828 {
829   Mat            B;
830   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
831 
832   PetscFunctionBegin;
833   /* Create the factorization matrix */
834   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
835   B->trivialsymbolic = PETSC_TRUE;
836   B->factortype = ftype;
837   PetscCall(PetscFree(B->solvertype));
838   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
839 
840   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
841   *F = B;
842   PetscFunctionReturn(0);
843 }
844 
845 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
846 {
847   PetscFunctionBegin;
848   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
849   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
850   PetscFunctionReturn(0);
851 }
852 
853 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
854 {
855   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
856   PetscBLASInt   one=1,lwork=0;
857   const char     *ntype;
858   PetscScalar    *work=NULL,dummy;
859 
860   PetscFunctionBegin;
861   switch (type) {
862     case NORM_1:
863       ntype = "1";
864       lwork = PetscMax(a->locr,a->locc);
865       break;
866     case NORM_FROBENIUS:
867       ntype = "F";
868       work  = &dummy;
869       break;
870     case NORM_INFINITY:
871       ntype = "I";
872       lwork = PetscMax(a->locr,a->locc);
873       break;
874     default:
875       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
876   }
877   if (lwork) PetscCall(PetscMalloc1(lwork,&work));
878   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
879   if (lwork) PetscCall(PetscFree(work));
880   PetscFunctionReturn(0);
881 }
882 
883 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
884 {
885   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
886 
887   PetscFunctionBegin;
888   PetscCall(PetscArrayzero(a->loc,a->lld*a->locc));
889   PetscFunctionReturn(0);
890 }
891 
892 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
893 {
894   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
895   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
896 
897   PetscFunctionBegin;
898   if (rows) {
899     n     = a->locr;
900     nb    = a->mb;
901     isrc  = a->rsrc;
902     nproc = a->grid->nprow;
903     iproc = a->grid->myrow;
904     PetscCall(PetscMalloc1(n,&idx));
905     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
906     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
907   }
908   if (cols) {
909     n     = a->locc;
910     nb    = a->nb;
911     isrc  = a->csrc;
912     nproc = a->grid->npcol;
913     iproc = a->grid->mycol;
914     PetscCall(PetscMalloc1(n,&idx));
915     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
916     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
917   }
918   PetscFunctionReturn(0);
919 }
920 
921 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
922 {
923   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
924   Mat            Bmpi;
925   MPI_Comm       comm;
926   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
927   const PetscInt *ranges,*branges,*cwork;
928   const PetscScalar *vwork;
929   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
930   PetscScalar    *barray;
931   PetscBool      differ=PETSC_FALSE;
932   PetscMPIInt    size;
933 
934   PetscFunctionBegin;
935   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
936   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
937 
938   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
939     PetscCallMPI(MPI_Comm_size(comm,&size));
940     PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges));
941     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
942   }
943 
944   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
945     PetscCall(MatCreate(comm,&Bmpi));
946     m = PETSC_DECIDE;
947     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
948     n = PETSC_DECIDE;
949     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
950     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
951     PetscCall(MatSetType(Bmpi,MATDENSE));
952     PetscCall(MatSetUp(Bmpi));
953 
954     /* create ScaLAPACK descriptor for B (1d block distribution) */
955     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
956     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
957     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
958     PetscCheckScaLapackInfo("descinit",info);
959 
960     /* redistribute matrix */
961     PetscCall(MatDenseGetArray(Bmpi,&barray));
962     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
963     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
964     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
965     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
966 
967     /* transfer rows of auxiliary matrix to the final matrix B */
968     PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend));
969     for (i=rstart;i<rend;i++) {
970       PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
971       PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
972       PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
973     }
974     PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
975     PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
976     PetscCall(MatDestroy(&Bmpi));
977 
978   } else {  /* normal cases */
979 
980     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
981     else {
982       PetscCall(MatCreate(comm,&Bmpi));
983       m = PETSC_DECIDE;
984       PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
985       n = PETSC_DECIDE;
986       PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
987       PetscCall(MatSetSizes(Bmpi,m,n,M,N));
988       PetscCall(MatSetType(Bmpi,MATDENSE));
989       PetscCall(MatSetUp(Bmpi));
990     }
991 
992     /* create ScaLAPACK descriptor for B (1d block distribution) */
993     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
994     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
995     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
996     PetscCheckScaLapackInfo("descinit",info);
997 
998     /* redistribute matrix */
999     PetscCall(MatDenseGetArray(Bmpi,&barray));
1000     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1001     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
1002 
1003     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1005     if (reuse == MAT_INPLACE_MATRIX) {
1006       PetscCall(MatHeaderReplace(A,&Bmpi));
1007     } else *B = Bmpi;
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct)
1013 {
1014   const PetscInt *ranges;
1015   PetscMPIInt    size;
1016   PetscInt       i,n;
1017 
1018   PetscFunctionBegin;
1019   *correct = PETSC_TRUE;
1020   PetscCallMPI(MPI_Comm_size(map->comm,&size));
1021   if (size>1) {
1022     PetscCall(PetscLayoutGetRanges(map,&ranges));
1023     n = ranges[1]-ranges[0];
1024     for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break;
1025     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n));
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1031 {
1032   Mat_ScaLAPACK  *b;
1033   Mat            Bmpi;
1034   MPI_Comm       comm;
1035   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1036   const PetscInt *ranges,*rows,*cols;
1037   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1038   PetscScalar    *aarray;
1039   IS             ir,ic;
1040   PetscInt       lda;
1041   PetscBool      flg;
1042 
1043   PetscFunctionBegin;
1044   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1045 
1046   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1047   else {
1048     PetscCall(MatCreate(comm,&Bmpi));
1049     m = PETSC_DECIDE;
1050     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1051     n = PETSC_DECIDE;
1052     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1053     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
1054     PetscCall(MatSetType(Bmpi,MATSCALAPACK));
1055     PetscCall(MatSetUp(Bmpi));
1056   }
1057   b = (Mat_ScaLAPACK*)Bmpi->data;
1058 
1059   PetscCall(MatDenseGetLDA(A,&lda));
1060   PetscCall(MatDenseGetArray(A,&aarray));
1061   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1062   if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1063   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1064     /* create ScaLAPACK descriptor for A (1d block distribution) */
1065     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
1066     PetscCall(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
1067     lld = PetscMax(lda,1);  /* local leading dimension */
1068     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1069     PetscCheckScaLapackInfo("descinit",info);
1070 
1071     /* redistribute matrix */
1072     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1073     Bmpi->nooffprocentries = PETSC_TRUE;
1074   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1075     PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n);
1076     b->roworiented = PETSC_FALSE;
1077     PetscCall(MatGetOwnershipIS(A,&ir,&ic));
1078     PetscCall(ISGetIndices(ir,&rows));
1079     PetscCall(ISGetIndices(ic,&cols));
1080     PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES));
1081     PetscCall(ISRestoreIndices(ir,&rows));
1082     PetscCall(ISRestoreIndices(ic,&cols));
1083     PetscCall(ISDestroy(&ic));
1084     PetscCall(ISDestroy(&ir));
1085   }
1086   PetscCall(MatDenseRestoreArray(A,&aarray));
1087   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1088   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1089   if (reuse == MAT_INPLACE_MATRIX) {
1090     PetscCall(MatHeaderReplace(A,&Bmpi));
1091   } else *B = Bmpi;
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1096 {
1097   Mat               mat_scal;
1098   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1099   const PetscInt    *cols;
1100   const PetscScalar *vals;
1101 
1102   PetscFunctionBegin;
1103   if (reuse == MAT_REUSE_MATRIX) {
1104     mat_scal = *newmat;
1105     PetscCall(MatZeroEntries(mat_scal));
1106   } else {
1107     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1108     m = PETSC_DECIDE;
1109     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1110     n = PETSC_DECIDE;
1111     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1112     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1113     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1114     PetscCall(MatSetUp(mat_scal));
1115   }
1116   for (row=rstart;row<rend;row++) {
1117     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1118     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
1119     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1120   }
1121   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1122   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1123 
1124   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1125   else *newmat = mat_scal;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1130 {
1131   Mat               mat_scal;
1132   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1133   const PetscInt    *cols;
1134   const PetscScalar *vals;
1135   PetscScalar       v;
1136 
1137   PetscFunctionBegin;
1138   if (reuse == MAT_REUSE_MATRIX) {
1139     mat_scal = *newmat;
1140     PetscCall(MatZeroEntries(mat_scal));
1141   } else {
1142     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1143     m = PETSC_DECIDE;
1144     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1145     n = PETSC_DECIDE;
1146     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1147     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1148     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1149     PetscCall(MatSetUp(mat_scal));
1150   }
1151   PetscCall(MatGetRowUpperTriangular(A));
1152   for (row=rstart;row<rend;row++) {
1153     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1154     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1155     for (j=0;j<ncols;j++) { /* lower triangular part */
1156       if (cols[j] == row) continue;
1157       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1158       PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1159     }
1160     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1161   }
1162   PetscCall(MatRestoreRowUpperTriangular(A));
1163   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1164   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1165 
1166   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1167   else *newmat = mat_scal;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1172 {
1173   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1174   PetscInt       sz=0;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(PetscLayoutSetUp(A->rmap));
1178   PetscCall(PetscLayoutSetUp(A->cmap));
1179   if (!a->lld) a->lld = a->locr;
1180 
1181   PetscCall(PetscFree(a->loc));
1182   PetscCall(PetscIntMultError(a->lld,a->locc,&sz));
1183   PetscCall(PetscCalloc1(sz,&a->loc));
1184   PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1185 
1186   A->preallocated = PETSC_TRUE;
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1191 {
1192   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1193   Mat_ScaLAPACK_Grid *grid;
1194   PetscBool          flg;
1195   MPI_Comm           icomm;
1196 
1197   PetscFunctionBegin;
1198   PetscCall(MatStashDestroy_Private(&A->stash));
1199   PetscCall(PetscFree(a->loc));
1200   PetscCall(PetscFree(a->pivots));
1201   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1202   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1203   if (--grid->grid_refct == 0) {
1204     Cblacs_gridexit(grid->ictxt);
1205     Cblacs_gridexit(grid->ictxrow);
1206     Cblacs_gridexit(grid->ictxcol);
1207     PetscCall(PetscFree(grid));
1208     PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1209   }
1210   PetscCall(PetscCommDestroy(&icomm));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
1215   PetscCall(PetscFree(A->data));
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1220 {
1221   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1222   PetscBLASInt   info=0;
1223   PetscBool      flg;
1224 
1225   PetscFunctionBegin;
1226   PetscCall(PetscLayoutSetUp(A->rmap));
1227   PetscCall(PetscLayoutSetUp(A->cmap));
1228 
1229   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1230   PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg));
1231   PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1232   PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg));
1233   PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1234 
1235   /* compute local sizes */
1236   PetscCall(PetscBLASIntCast(A->rmap->N,&a->M));
1237   PetscCall(PetscBLASIntCast(A->cmap->N,&a->N));
1238   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240   a->lld  = PetscMax(1,a->locr);
1241 
1242   /* allocate local array */
1243   PetscCall(MatScaLAPACKSetPreallocation(A));
1244 
1245   /* set up ScaLAPACK descriptor */
1246   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1247   PetscCheckScaLapackInfo("descinit",info);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252 {
1253   PetscInt       nstash,reallocs;
1254 
1255   PetscFunctionBegin;
1256   if (A->nooffprocentries) PetscFunctionReturn(0);
1257   PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL));
1258   PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
1259   PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1264 {
1265   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1266   PetscMPIInt    n;
1267   PetscInt       i,flg,*row,*col;
1268   PetscScalar    *val;
1269   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1270 
1271   PetscFunctionBegin;
1272   if (A->nooffprocentries) PetscFunctionReturn(0);
1273   while (1) {
1274     PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1275     if (!flg) break;
1276     for (i=0;i<n;i++) {
1277       PetscCall(PetscBLASIntCast(row[i]+1,&gridx));
1278       PetscCall(PetscBLASIntCast(col[i]+1,&gcidx));
1279       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1280       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1281       switch (A->insertmode) {
1282         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1283         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1284         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1285       }
1286     }
1287   }
1288   PetscCall(MatStashScatterEnd_Private(&A->stash));
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1293 {
1294   Mat            Adense,As;
1295   MPI_Comm       comm;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1299   PetscCall(MatCreate(comm,&Adense));
1300   PetscCall(MatSetType(Adense,MATDENSE));
1301   PetscCall(MatLoad(Adense,viewer));
1302   PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
1303   PetscCall(MatDestroy(&Adense));
1304   PetscCall(MatHeaderReplace(newMat,&As));
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 /* -------------------------------------------------------------------*/
1309 static struct _MatOps MatOps_Values = {
1310        MatSetValues_ScaLAPACK,
1311        0,
1312        0,
1313        MatMult_ScaLAPACK,
1314 /* 4*/ MatMultAdd_ScaLAPACK,
1315        MatMultTranspose_ScaLAPACK,
1316        MatMultTransposeAdd_ScaLAPACK,
1317        MatSolve_ScaLAPACK,
1318        MatSolveAdd_ScaLAPACK,
1319        0,
1320 /*10*/ 0,
1321        MatLUFactor_ScaLAPACK,
1322        MatCholeskyFactor_ScaLAPACK,
1323        0,
1324        MatTranspose_ScaLAPACK,
1325 /*15*/ MatGetInfo_ScaLAPACK,
1326        0,
1327        MatGetDiagonal_ScaLAPACK,
1328        MatDiagonalScale_ScaLAPACK,
1329        MatNorm_ScaLAPACK,
1330 /*20*/ MatAssemblyBegin_ScaLAPACK,
1331        MatAssemblyEnd_ScaLAPACK,
1332        MatSetOption_ScaLAPACK,
1333        MatZeroEntries_ScaLAPACK,
1334 /*24*/ 0,
1335        MatLUFactorSymbolic_ScaLAPACK,
1336        MatLUFactorNumeric_ScaLAPACK,
1337        MatCholeskyFactorSymbolic_ScaLAPACK,
1338        MatCholeskyFactorNumeric_ScaLAPACK,
1339 /*29*/ MatSetUp_ScaLAPACK,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*34*/ MatDuplicate_ScaLAPACK,
1345        0,
1346        0,
1347        0,
1348        0,
1349 /*39*/ MatAXPY_ScaLAPACK,
1350        0,
1351        0,
1352        0,
1353        MatCopy_ScaLAPACK,
1354 /*44*/ 0,
1355        MatScale_ScaLAPACK,
1356        MatShift_ScaLAPACK,
1357        0,
1358        0,
1359 /*49*/ 0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*54*/ 0,
1365        0,
1366        0,
1367        0,
1368        0,
1369 /*59*/ 0,
1370        MatDestroy_ScaLAPACK,
1371        MatView_ScaLAPACK,
1372        0,
1373        0,
1374 /*64*/ 0,
1375        0,
1376        0,
1377        0,
1378        0,
1379 /*69*/ 0,
1380        0,
1381        MatConvert_ScaLAPACK_Dense,
1382        0,
1383        0,
1384 /*74*/ 0,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*79*/ 0,
1390        0,
1391        0,
1392        0,
1393        MatLoad_ScaLAPACK,
1394 /*84*/ 0,
1395        0,
1396        0,
1397        0,
1398        0,
1399 /*89*/ 0,
1400        0,
1401        MatMatMultNumeric_ScaLAPACK,
1402        0,
1403        0,
1404 /*94*/ 0,
1405        0,
1406        0,
1407        MatMatTransposeMultNumeric_ScaLAPACK,
1408        0,
1409 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1410        0,
1411        0,
1412        MatConjugate_ScaLAPACK,
1413        0,
1414 /*104*/0,
1415        0,
1416        0,
1417        0,
1418        0,
1419 /*109*/MatMatSolve_ScaLAPACK,
1420        0,
1421        0,
1422        0,
1423        MatMissingDiagonal_ScaLAPACK,
1424 /*114*/0,
1425        0,
1426        0,
1427        0,
1428        0,
1429 /*119*/0,
1430        MatHermitianTranspose_ScaLAPACK,
1431        0,
1432        0,
1433        0,
1434 /*124*/0,
1435        0,
1436        0,
1437        0,
1438        0,
1439 /*129*/0,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*134*/0,
1445        0,
1446        0,
1447        0,
1448        0,
1449        0,
1450 /*140*/0,
1451        0,
1452        0,
1453        0,
1454        0,
1455 /*145*/0,
1456        0,
1457        0,
1458        0,
1459        0
1460 };
1461 
1462 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1463 {
1464   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1465   PetscInt           size=stash->size,nsends;
1466   PetscInt           count,*sindices,**rindices,i,j,l;
1467   PetscScalar        **rvalues,*svalues;
1468   MPI_Comm           comm = stash->comm;
1469   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1470   PetscMPIInt        *sizes,*nlengths,nreceives;
1471   PetscInt           *sp_idx,*sp_idy;
1472   PetscScalar        *sp_val;
1473   PetscMatStashSpace space,space_next;
1474   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1475   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1476 
1477   PetscFunctionBegin;
1478   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1479     InsertMode addv;
1480     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
1481     PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1482     mat->insertmode = addv; /* in case this processor had no cache */
1483   }
1484 
1485   bs2 = stash->bs*stash->bs;
1486 
1487   /*  first count number of contributors to each processor */
1488   PetscCall(PetscCalloc1(size,&nlengths));
1489   PetscCall(PetscMalloc1(stash->n+1,&owner));
1490 
1491   i     = j    = 0;
1492   space = stash->space_head;
1493   while (space) {
1494     space_next = space->next;
1495     for (l=0; l<space->local_used; l++) {
1496       PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx));
1497       PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1498       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1499       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1500       nlengths[j]++; owner[i] = j;
1501       i++;
1502     }
1503     space = space_next;
1504   }
1505 
1506   /* Now check what procs get messages - and compute nsends. */
1507   PetscCall(PetscCalloc1(size,&sizes));
1508   for (i=0, nsends=0; i<size; i++) {
1509     if (nlengths[i]) {
1510       sizes[i] = 1; nsends++;
1511     }
1512   }
1513 
1514   {PetscMPIInt *onodes,*olengths;
1515    /* Determine the number of messages to expect, their lengths, from from-ids */
1516    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
1517    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1518    /* since clubbing row,col - lengths are multiplied by 2 */
1519    for (i=0; i<nreceives; i++) olengths[i] *=2;
1520    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1521    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1522    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1523    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
1524    PetscCall(PetscFree(onodes));
1525    PetscCall(PetscFree(olengths));}
1526 
1527   /* do sends:
1528       1) starts[i] gives the starting index in svalues for stuff going to
1529          the ith processor
1530   */
1531   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
1532   PetscCall(PetscMalloc1(2*nsends,&send_waits));
1533   PetscCall(PetscMalloc2(size,&startv,size,&starti));
1534   /* use 2 sends the first with all_a, the next with all_i and all_j */
1535   startv[0] = 0; starti[0] = 0;
1536   for (i=1; i<size; i++) {
1537     startv[i] = startv[i-1] + nlengths[i-1];
1538     starti[i] = starti[i-1] + 2*nlengths[i-1];
1539   }
1540 
1541   i     = 0;
1542   space = stash->space_head;
1543   while (space) {
1544     space_next = space->next;
1545     sp_idx     = space->idx;
1546     sp_idy     = space->idy;
1547     sp_val     = space->val;
1548     for (l=0; l<space->local_used; l++) {
1549       j = owner[i];
1550       if (bs2 == 1) {
1551         svalues[startv[j]] = sp_val[l];
1552       } else {
1553         PetscInt    k;
1554         PetscScalar *buf1,*buf2;
1555         buf1 = svalues+bs2*startv[j];
1556         buf2 = space->val + bs2*l;
1557         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1558       }
1559       sindices[starti[j]]             = sp_idx[l];
1560       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1561       startv[j]++;
1562       starti[j]++;
1563       i++;
1564     }
1565     space = space_next;
1566   }
1567   startv[0] = 0;
1568   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1569 
1570   for (i=0,count=0; i<size; i++) {
1571     if (sizes[i]) {
1572       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
1573       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1574     }
1575   }
1576 #if defined(PETSC_USE_INFO)
1577   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1578   for (i=0; i<size; i++) {
1579     if (sizes[i]) {
1580       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1581     }
1582   }
1583 #endif
1584   PetscCall(PetscFree(nlengths));
1585   PetscCall(PetscFree(owner));
1586   PetscCall(PetscFree2(startv,starti));
1587   PetscCall(PetscFree(sizes));
1588 
1589   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1590   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
1591 
1592   for (i=0; i<nreceives; i++) {
1593     recv_waits[2*i]   = recv_waits1[i];
1594     recv_waits[2*i+1] = recv_waits2[i];
1595   }
1596   stash->recv_waits = recv_waits;
1597 
1598   PetscCall(PetscFree(recv_waits1));
1599   PetscCall(PetscFree(recv_waits2));
1600 
1601   stash->svalues         = svalues;
1602   stash->sindices        = sindices;
1603   stash->rvalues         = rvalues;
1604   stash->rindices        = rindices;
1605   stash->send_waits      = send_waits;
1606   stash->nsends          = nsends;
1607   stash->nrecvs          = nreceives;
1608   stash->reproduce_count = 0;
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1613 {
1614   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1615 
1616   PetscFunctionBegin;
1617   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1618   PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1619   PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
1620   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1621   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 /*@
1626    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1627    the ScaLAPACK matrix
1628 
1629    Logically Collective on A
1630 
1631    Input Parameters:
1632 +  A  - a MATSCALAPACK matrix
1633 .  mb - the row block size
1634 -  nb - the column block size
1635 
1636    Level: intermediate
1637 
1638 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1639 @*/
1640 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1641 {
1642   PetscFunctionBegin;
1643   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1644   PetscValidLogicalCollectiveInt(A,mb,2);
1645   PetscValidLogicalCollectiveInt(A,nb,3);
1646   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1651 {
1652   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1653 
1654   PetscFunctionBegin;
1655   if (mb) *mb = a->mb;
1656   if (nb) *nb = a->nb;
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 /*@
1661    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1662    the ScaLAPACK matrix
1663 
1664    Not collective
1665 
1666    Input Parameter:
1667 .  A  - a MATSCALAPACK matrix
1668 
1669    Output Parameters:
1670 +  mb - the row block size
1671 -  nb - the column block size
1672 
1673    Level: intermediate
1674 
1675 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1676 @*/
1677 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1678 {
1679   PetscFunctionBegin;
1680   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1681   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1686 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1687 
1688 /*MC
1689    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1690 
1691    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1692 
1693    Options Database Keys:
1694 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1695 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1696 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1697 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1698 
1699   Note:
1700    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1701    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1702    the given rank.
1703 
1704    Level: beginner
1705 
1706 .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1707 M*/
1708 
1709 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1710 {
1711   Mat_ScaLAPACK      *a;
1712   PetscBool          flg,flg1;
1713   Mat_ScaLAPACK_Grid *grid;
1714   MPI_Comm           icomm;
1715   PetscBLASInt       nprow,npcol,myrow,mycol;
1716   PetscInt           optv1,k=2,array[2]={0,0};
1717   PetscMPIInt        size;
1718 
1719   PetscFunctionBegin;
1720   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1721   A->insertmode = NOT_SET_VALUES;
1722 
1723   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1724   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1725   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1726   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1727   A->stash.ScatterDestroy = NULL;
1728 
1729   PetscCall(PetscNewLog(A,&a));
1730   A->data = (void*)a;
1731 
1732   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1733   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1734     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
1735     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1736     PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1737   }
1738   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1739   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1740   if (!flg) {
1741     PetscCall(PetscNewLog(A,&grid));
1742 
1743     PetscCallMPI(MPI_Comm_size(icomm,&size));
1744     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1745 
1746     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1747     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1748     if (flg1) {
1749       PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1750       grid->nprow = optv1;
1751     }
1752     PetscOptionsEnd();
1753 
1754     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1755     grid->npcol = size/grid->nprow;
1756     PetscCall(PetscBLASIntCast(grid->nprow,&nprow));
1757     PetscCall(PetscBLASIntCast(grid->npcol,&npcol));
1758     grid->ictxt = Csys2blacs_handle(icomm);
1759     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1760     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1761     grid->grid_refct = 1;
1762     grid->nprow      = nprow;
1763     grid->npcol      = npcol;
1764     grid->myrow      = myrow;
1765     grid->mycol      = mycol;
1766     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1767     grid->ictxrow = Csys2blacs_handle(icomm);
1768     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1769     grid->ictxcol = Csys2blacs_handle(icomm);
1770     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1771     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1772 
1773   } else grid->grid_refct++;
1774   PetscCall(PetscCommDestroy(&icomm));
1775   a->grid = grid;
1776   a->mb   = DEFAULT_BLOCKSIZE;
1777   a->nb   = DEFAULT_BLOCKSIZE;
1778 
1779   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1780   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1781   if (flg) {
1782     a->mb = array[0];
1783     a->nb = (k>1)? array[1]: a->mb;
1784   }
1785   PetscOptionsEnd();
1786 
1787   a->roworiented = PETSC_TRUE;
1788   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
1789   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
1790   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
1791   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 /*@C
1796    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1797    (2D block cyclic distribution).
1798 
1799    Collective
1800 
1801    Input Parameters:
1802 +  comm - MPI communicator
1803 .  mb   - row block size (or PETSC_DECIDE to have it set)
1804 .  nb   - column block size (or PETSC_DECIDE to have it set)
1805 .  M    - number of global rows
1806 .  N    - number of global columns
1807 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1808 -  csrc - coordinate of process that owns the first column of the distributed matrix
1809 
1810    Output Parameter:
1811 .  A - the matrix
1812 
1813    Options Database Keys:
1814 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1815 
1816    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1817    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1818    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1819 
1820    Notes:
1821    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1822    is chosen.
1823 
1824    Storage Information:
1825    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1826    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1827    significance and are thus ignored. The block sizes refer to the values
1828    used for the distributed matrix, not the same meaning as in BAIJ.
1829 
1830    Level: intermediate
1831 
1832 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1833 @*/
1834 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1835 {
1836   Mat_ScaLAPACK  *a;
1837   PetscInt       m,n;
1838 
1839   PetscFunctionBegin;
1840   PetscCall(MatCreate(comm,A));
1841   PetscCall(MatSetType(*A,MATSCALAPACK));
1842   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1843   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1844   m = PETSC_DECIDE;
1845   PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1846   n = PETSC_DECIDE;
1847   PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1848   PetscCall(MatSetSizes(*A,m,n,M,N));
1849   a = (Mat_ScaLAPACK*)(*A)->data;
1850   PetscCall(PetscBLASIntCast(M,&a->M));
1851   PetscCall(PetscBLASIntCast(N,&a->N));
1852   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1853   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1854   PetscCall(PetscBLASIntCast(rsrc,&a->rsrc));
1855   PetscCall(PetscBLASIntCast(csrc,&a->csrc));
1856   PetscCall(MatSetUp(*A));
1857   PetscFunctionReturn(0);
1858 }
1859