xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31 {
32   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer,&format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
53   PetscCall(MatView(Adense,viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59 {
60   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61   PetscLogDouble isend[2],irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld*a->locc;     /* locally allocated */
67   isend[1] = a->locr*a->locc;    /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated   = isend[0];
70     info->nz_used        = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73     info->nz_allocated   = irecv[0];
74     info->nz_used        = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = ((PetscObject)A)->mem;
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92 {
93   PetscFunctionBegin;
94   switch (op) {
95     case MAT_NEW_NONZERO_LOCATIONS:
96     case MAT_NEW_NONZERO_LOCATION_ERR:
97     case MAT_NEW_NONZERO_ALLOCATION_ERR:
98     case MAT_SYMMETRIC:
99     case MAT_SORTED_FULL:
100     case MAT_HERMITIAN:
101       break;
102     default:
103       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
109 {
110   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
111   PetscInt       i,j;
112   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
113 
114   PetscFunctionBegin;
115   for (i=0;i<nr;i++) {
116     if (rows[i] < 0) continue;
117     PetscCall(PetscBLASIntCast(rows[i]+1,&gridx));
118     for (j=0;j<nc;j++) {
119       if (cols[j] < 0) continue;
120       PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx));
121       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
122       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
123         switch (imode) {
124           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
125           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
126           default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
127         }
128       } else {
129         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
130         A->assembled = PETSC_FALSE;
131         PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES)));
132       }
133     }
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
139 {
140   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
141   PetscScalar    *x2d,*y2d,alpha=1.0;
142   const PetscInt *ranges;
143   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
144 
145   PetscFunctionBegin;
146   if (transpose) {
147 
148     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
149     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
150     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
151     xlld = PetscMax(1,A->rmap->n);
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
155     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
156     ylld = 1;
157     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
158     PetscCheckScaLapackInfo("descinit",info);
159 
160     /* allocate 2d vectors */
161     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
162     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
163     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
164     xlld = PetscMax(1,lszx);
165 
166     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
167     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
168     PetscCheckScaLapackInfo("descinit",info);
169     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
170     PetscCheckScaLapackInfo("descinit",info);
171 
172     /* redistribute x as a column of a 2d matrix */
173     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
174 
175     /* redistribute y as a row of a 2d matrix */
176     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
177 
178     /* call PBLAS subroutine */
179     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
180 
181     /* redistribute y from a row of a 2d matrix */
182     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
183 
184   } else {   /* non-transpose */
185 
186     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
187     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
188     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
189     xlld = 1;
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
193     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
194     ylld = PetscMax(1,A->rmap->n);
195     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
196     PetscCheckScaLapackInfo("descinit",info);
197 
198     /* allocate 2d vectors */
199     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
200     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
201     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
202     ylld = PetscMax(1,lszy);
203 
204     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
205     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
206     PetscCheckScaLapackInfo("descinit",info);
207     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
208     PetscCheckScaLapackInfo("descinit",info);
209 
210     /* redistribute x as a row of a 2d matrix */
211     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
212 
213     /* redistribute y as a column of a 2d matrix */
214     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
215 
216     /* call PBLAS subroutine */
217     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
218 
219     /* redistribute y from a column of a 2d matrix */
220     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
221 
222   }
223   PetscCall(PetscFree2(x2d,y2d));
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   const PetscScalar *xarray;
230   PetscScalar       *yarray;
231 
232   PetscFunctionBegin;
233   PetscCall(VecGetArrayRead(x,&xarray));
234   PetscCall(VecGetArray(y,&yarray));
235   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
236   PetscCall(VecRestoreArrayRead(x,&xarray));
237   PetscCall(VecRestoreArray(y,&yarray));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
242 {
243   const PetscScalar *xarray;
244   PetscScalar       *yarray;
245 
246   PetscFunctionBegin;
247   PetscCall(VecGetArrayRead(x,&xarray));
248   PetscCall(VecGetArray(y,&yarray));
249   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
250   PetscCall(VecRestoreArrayRead(x,&xarray));
251   PetscCall(VecRestoreArray(y,&yarray));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
256 {
257   const PetscScalar *xarray;
258   PetscScalar       *zarray;
259 
260   PetscFunctionBegin;
261   if (y != z) PetscCall(VecCopy(y,z));
262   PetscCall(VecGetArrayRead(x,&xarray));
263   PetscCall(VecGetArray(z,&zarray));
264   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
265   PetscCall(VecRestoreArrayRead(x,&xarray));
266   PetscCall(VecRestoreArray(z,&zarray));
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
271 {
272   const PetscScalar *xarray;
273   PetscScalar       *zarray;
274 
275   PetscFunctionBegin;
276   if (y != z) PetscCall(VecCopy(y,z));
277   PetscCall(VecGetArrayRead(x,&xarray));
278   PetscCall(VecGetArray(z,&zarray));
279   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
280   PetscCall(VecRestoreArrayRead(x,&xarray));
281   PetscCall(VecRestoreArray(z,&zarray));
282   PetscFunctionReturn(0);
283 }
284 
285 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
286 {
287   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
288   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
289   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
290   PetscScalar   sone=1.0,zero=0.0;
291   PetscBLASInt  one=1;
292 
293   PetscFunctionBegin;
294   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
295   C->assembled = PETSC_TRUE;
296   PetscFunctionReturn(0);
297 }
298 
299 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
300 {
301   PetscFunctionBegin;
302   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
303   PetscCall(MatSetType(C,MATSCALAPACK));
304   PetscCall(MatSetUp(C));
305   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
306   PetscFunctionReturn(0);
307 }
308 
309 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
310 {
311   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
312   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
313   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
314   PetscScalar   sone=1.0,zero=0.0;
315   PetscBLASInt  one=1;
316 
317   PetscFunctionBegin;
318   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324 {
325   PetscFunctionBegin;
326   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
327   PetscCall(MatSetType(C,MATSCALAPACK));
328   PetscCall(MatSetUp(C));
329   PetscFunctionReturn(0);
330 }
331 
332 /* --------------------------------------- */
333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
334 {
335   PetscFunctionBegin;
336   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
337   C->ops->productsymbolic = MatProductSymbolic_AB;
338   PetscFunctionReturn(0);
339 }
340 
341 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
342 {
343   PetscFunctionBegin;
344   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
345   C->ops->productsymbolic          = MatProductSymbolic_ABt;
346   PetscFunctionReturn(0);
347 }
348 
349 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
350 {
351   Mat_Product    *product = C->product;
352 
353   PetscFunctionBegin;
354   switch (product->type) {
355     case MATPRODUCT_AB:
356       PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
357       break;
358     case MATPRODUCT_ABt:
359       PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
360       break;
361     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
362   }
363   PetscFunctionReturn(0);
364 }
365 /* --------------------------------------- */
366 
367 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
368 {
369   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
370   PetscScalar       *darray,*d2d,v;
371   const PetscInt    *ranges;
372   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
373 
374   PetscFunctionBegin;
375   PetscCall(VecGetArray(D,&darray));
376 
377   if (A->rmap->N<=A->cmap->N) {   /* row version */
378 
379     /* create ScaLAPACK descriptor for vector (1d block distribution) */
380     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
381     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
382     dlld = PetscMax(1,A->rmap->n);
383     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
384     PetscCheckScaLapackInfo("descinit",info);
385 
386     /* allocate 2d vector */
387     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
388     PetscCall(PetscCalloc1(lszd,&d2d));
389     dlld = PetscMax(1,lszd);
390 
391     /* create ScaLAPACK descriptor for vector (2d block distribution) */
392     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
393     PetscCheckScaLapackInfo("descinit",info);
394 
395     /* collect diagonal */
396     for (j=1;j<=a->M;j++) {
397       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
398       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
399     }
400 
401     /* redistribute d from a column of a 2d matrix */
402     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
403     PetscCall(PetscFree(d2d));
404 
405   } else {   /* column version */
406 
407     /* create ScaLAPACK descriptor for vector (1d block distribution) */
408     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
409     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
410     dlld = 1;
411     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
412     PetscCheckScaLapackInfo("descinit",info);
413 
414     /* allocate 2d vector */
415     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
416     PetscCall(PetscCalloc1(lszd,&d2d));
417 
418     /* create ScaLAPACK descriptor for vector (2d block distribution) */
419     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
420     PetscCheckScaLapackInfo("descinit",info);
421 
422     /* collect diagonal */
423     for (j=1;j<=a->N;j++) {
424       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
425       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
426     }
427 
428     /* redistribute d from a row of a 2d matrix */
429     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
430     PetscCall(PetscFree(d2d));
431   }
432 
433   PetscCall(VecRestoreArray(D,&darray));
434   PetscCall(VecAssemblyBegin(D));
435   PetscCall(VecAssemblyEnd(D));
436   PetscFunctionReturn(0);
437 }
438 
439 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
440 {
441   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
442   const PetscScalar *d;
443   const PetscInt    *ranges;
444   PetscScalar       *d2d;
445   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
446 
447   PetscFunctionBegin;
448   if (R) {
449     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
450     /* create ScaLAPACK descriptor for vector (1d block distribution) */
451     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
452     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
453     dlld = 1;
454     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
455     PetscCheckScaLapackInfo("descinit",info);
456 
457     /* allocate 2d vector */
458     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
459     PetscCall(PetscCalloc1(lszd,&d2d));
460 
461     /* create ScaLAPACK descriptor for vector (2d block distribution) */
462     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
463     PetscCheckScaLapackInfo("descinit",info);
464 
465     /* redistribute d to a row of a 2d matrix */
466     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
467 
468     /* broadcast along process columns */
469     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
470     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
471 
472     /* local scaling */
473     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
474 
475     PetscCall(PetscFree(d2d));
476     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
477   }
478   if (L) {
479     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
480     /* create ScaLAPACK descriptor for vector (1d block distribution) */
481     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
482     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
483     dlld = PetscMax(1,A->rmap->n);
484     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
485     PetscCheckScaLapackInfo("descinit",info);
486 
487     /* allocate 2d vector */
488     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
489     PetscCall(PetscCalloc1(lszd,&d2d));
490     dlld = PetscMax(1,lszd);
491 
492     /* create ScaLAPACK descriptor for vector (2d block distribution) */
493     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
494     PetscCheckScaLapackInfo("descinit",info);
495 
496     /* redistribute d to a column of a 2d matrix */
497     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
498 
499     /* broadcast along process rows */
500     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
501     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
502 
503     /* local scaling */
504     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
505 
506     PetscCall(PetscFree(d2d));
507     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
513 {
514   PetscFunctionBegin;
515   *missing = PETSC_FALSE;
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
520 {
521   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
522   PetscBLASInt  n,one=1;
523 
524   PetscFunctionBegin;
525   n = x->lld*x->locc;
526   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
527   PetscFunctionReturn(0);
528 }
529 
530 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
531 {
532   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
533   PetscBLASInt  i,n;
534   PetscScalar   v;
535 
536   PetscFunctionBegin;
537   n = PetscMin(x->M,x->N);
538   for (i=1;i<=n;i++) {
539     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
540     v += alpha;
541     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
547 {
548   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
549   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
550   PetscBLASInt   one=1;
551   PetscScalar    beta=1.0;
552 
553   PetscFunctionBegin;
554   MatScaLAPACKCheckDistribution(Y,1,X,3);
555   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
556   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
557   PetscFunctionReturn(0);
558 }
559 
560 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
561 {
562   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
563   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
564 
565   PetscFunctionBegin;
566   PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
567   PetscCall(PetscObjectStateIncrease((PetscObject)B));
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
572 {
573   Mat            Bs;
574   MPI_Comm       comm;
575   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
579   PetscCall(MatCreate(comm,&Bs));
580   PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
581   PetscCall(MatSetType(Bs,MATSCALAPACK));
582   b = (Mat_ScaLAPACK*)Bs->data;
583   b->M    = a->M;
584   b->N    = a->N;
585   b->mb   = a->mb;
586   b->nb   = a->nb;
587   b->rsrc = a->rsrc;
588   b->csrc = a->csrc;
589   PetscCall(MatSetUp(Bs));
590   *B = Bs;
591   if (op == MAT_COPY_VALUES) {
592     PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
593   }
594   Bs->assembled = PETSC_TRUE;
595   PetscFunctionReturn(0);
596 }
597 
598 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
599 {
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
611     *B = Bs;
612   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
613   b = (Mat_ScaLAPACK*)Bs->data;
614   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
615 #if defined(PETSC_USE_COMPLEX)
616   /* undo conjugation */
617   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
618 #endif
619   Bs->assembled = PETSC_TRUE;
620   PetscFunctionReturn(0);
621 }
622 
623 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
624 {
625   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
626   PetscInt      i,j;
627 
628   PetscFunctionBegin;
629   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
634 {
635   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
636   Mat            Bs = *B;
637   PetscBLASInt   one=1;
638   PetscScalar    sone=1.0,zero=0.0;
639 
640   PetscFunctionBegin;
641   if (reuse == MAT_INITIAL_MATRIX) {
642     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
643     *B = Bs;
644   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
645   b = (Mat_ScaLAPACK*)Bs->data;
646   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
647   Bs->assembled = PETSC_TRUE;
648   PetscFunctionReturn(0);
649 }
650 
651 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
652 {
653   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
654   PetscScalar    *x,*x2d;
655   const PetscInt *ranges;
656   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
657 
658   PetscFunctionBegin;
659   PetscCall(VecCopy(B,X));
660   PetscCall(VecGetArray(X,&x));
661 
662   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
663   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
664   PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
665   xlld = PetscMax(1,A->rmap->n);
666   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
667   PetscCheckScaLapackInfo("descinit",info);
668 
669   /* allocate 2d vector */
670   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
671   PetscCall(PetscMalloc1(lszx,&x2d));
672   xlld = PetscMax(1,lszx);
673 
674   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
675   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
676   PetscCheckScaLapackInfo("descinit",info);
677 
678   /* redistribute x as a column of a 2d matrix */
679   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
680 
681   /* call ScaLAPACK subroutine */
682   switch (A->factortype) {
683     case MAT_FACTOR_LU:
684       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
685       PetscCheckScaLapackInfo("getrs",info);
686       break;
687     case MAT_FACTOR_CHOLESKY:
688       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
689       PetscCheckScaLapackInfo("potrs",info);
690       break;
691     default:
692       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
693   }
694 
695   /* redistribute x from a column of a 2d matrix */
696   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
697 
698   PetscCall(PetscFree(x2d));
699   PetscCall(VecRestoreArray(X,&x));
700   PetscFunctionReturn(0);
701 }
702 
703 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
704 {
705   PetscFunctionBegin;
706   PetscCall(MatSolve_ScaLAPACK(A,B,X));
707   PetscCall(VecAXPY(X,1,Y));
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
712 {
713   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
714   PetscBool      flg1,flg2;
715   PetscBLASInt   one=1,info;
716 
717   PetscFunctionBegin;
718   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
719   PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
720   PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
721   MatScaLAPACKCheckDistribution(B,1,X,2);
722   b = (Mat_ScaLAPACK*)B->data;
723   x = (Mat_ScaLAPACK*)X->data;
724   PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
725 
726   switch (A->factortype) {
727     case MAT_FACTOR_LU:
728       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
729       PetscCheckScaLapackInfo("getrs",info);
730       break;
731     case MAT_FACTOR_CHOLESKY:
732       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
733       PetscCheckScaLapackInfo("potrs",info);
734       break;
735     default:
736       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
742 {
743   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
744   PetscBLASInt   one=1,info;
745 
746   PetscFunctionBegin;
747   if (!a->pivots) {
748     PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots));
749     PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
750   }
751   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
752   PetscCheckScaLapackInfo("getrf",info);
753   A->factortype = MAT_FACTOR_LU;
754   A->assembled  = PETSC_TRUE;
755 
756   PetscCall(PetscFree(A->solvertype));
757   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
758   PetscFunctionReturn(0);
759 }
760 
761 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
762 {
763   PetscFunctionBegin;
764   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
765   PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
770 {
771   PetscFunctionBegin;
772   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
773   PetscFunctionReturn(0);
774 }
775 
776 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
777 {
778   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
779   PetscBLASInt   one=1,info;
780 
781   PetscFunctionBegin;
782   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
783   PetscCheckScaLapackInfo("potrf",info);
784   A->factortype = MAT_FACTOR_CHOLESKY;
785   A->assembled  = PETSC_TRUE;
786 
787   PetscCall(PetscFree(A->solvertype));
788   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
793 {
794   PetscFunctionBegin;
795   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
796   PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info));
797   PetscFunctionReturn(0);
798 }
799 
800 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
801 {
802   PetscFunctionBegin;
803   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
808 {
809   PetscFunctionBegin;
810   *type = MATSOLVERSCALAPACK;
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
815 {
816   Mat            B;
817   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
818 
819   PetscFunctionBegin;
820   /* Create the factorization matrix */
821   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
822   B->trivialsymbolic = PETSC_TRUE;
823   B->factortype = ftype;
824   PetscCall(PetscFree(B->solvertype));
825   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
826 
827   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
828   *F = B;
829   PetscFunctionReturn(0);
830 }
831 
832 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
833 {
834   PetscFunctionBegin;
835   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
836   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
837   PetscFunctionReturn(0);
838 }
839 
840 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
843   PetscBLASInt   one=1,lwork=0;
844   const char     *ntype;
845   PetscScalar    *work=NULL,dummy;
846 
847   PetscFunctionBegin;
848   switch (type) {
849     case NORM_1:
850       ntype = "1";
851       lwork = PetscMax(a->locr,a->locc);
852       break;
853     case NORM_FROBENIUS:
854       ntype = "F";
855       work  = &dummy;
856       break;
857     case NORM_INFINITY:
858       ntype = "I";
859       lwork = PetscMax(a->locr,a->locc);
860       break;
861     default:
862       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
863   }
864   if (lwork) PetscCall(PetscMalloc1(lwork,&work));
865   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
866   if (lwork) PetscCall(PetscFree(work));
867   PetscFunctionReturn(0);
868 }
869 
870 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
871 {
872   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
873 
874   PetscFunctionBegin;
875   PetscCall(PetscArrayzero(a->loc,a->lld*a->locc));
876   PetscFunctionReturn(0);
877 }
878 
879 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
880 {
881   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
882   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
883 
884   PetscFunctionBegin;
885   if (rows) {
886     n     = a->locr;
887     nb    = a->mb;
888     isrc  = a->rsrc;
889     nproc = a->grid->nprow;
890     iproc = a->grid->myrow;
891     PetscCall(PetscMalloc1(n,&idx));
892     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
893     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
894   }
895   if (cols) {
896     n     = a->locc;
897     nb    = a->nb;
898     isrc  = a->csrc;
899     nproc = a->grid->npcol;
900     iproc = a->grid->mycol;
901     PetscCall(PetscMalloc1(n,&idx));
902     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
903     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
909 {
910   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
911   Mat            Bmpi;
912   MPI_Comm       comm;
913   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
914   const PetscInt *ranges,*branges,*cwork;
915   const PetscScalar *vwork;
916   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
917   PetscScalar    *barray;
918   PetscBool      differ=PETSC_FALSE;
919   PetscMPIInt    size;
920 
921   PetscFunctionBegin;
922   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
923   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
924 
925   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
926     PetscCallMPI(MPI_Comm_size(comm,&size));
927     PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges));
928     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
929   }
930 
931   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
932     PetscCall(MatCreate(comm,&Bmpi));
933     m = PETSC_DECIDE;
934     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
935     n = PETSC_DECIDE;
936     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
937     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
938     PetscCall(MatSetType(Bmpi,MATDENSE));
939     PetscCall(MatSetUp(Bmpi));
940 
941     /* create ScaLAPACK descriptor for B (1d block distribution) */
942     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
943     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
944     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
945     PetscCheckScaLapackInfo("descinit",info);
946 
947     /* redistribute matrix */
948     PetscCall(MatDenseGetArray(Bmpi,&barray));
949     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
950     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
951     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
952     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
953 
954     /* transfer rows of auxiliary matrix to the final matrix B */
955     PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend));
956     for (i=rstart;i<rend;i++) {
957       PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
958       PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
959       PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
960     }
961     PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
962     PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
963     PetscCall(MatDestroy(&Bmpi));
964 
965   } else {  /* normal cases */
966 
967     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
968     else {
969       PetscCall(MatCreate(comm,&Bmpi));
970       m = PETSC_DECIDE;
971       PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
972       n = PETSC_DECIDE;
973       PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
974       PetscCall(MatSetSizes(Bmpi,m,n,M,N));
975       PetscCall(MatSetType(Bmpi,MATDENSE));
976       PetscCall(MatSetUp(Bmpi));
977     }
978 
979     /* create ScaLAPACK descriptor for B (1d block distribution) */
980     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
981     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
983     PetscCheckScaLapackInfo("descinit",info);
984 
985     /* redistribute matrix */
986     PetscCall(MatDenseGetArray(Bmpi,&barray));
987     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
988     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
989 
990     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
991     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
992     if (reuse == MAT_INPLACE_MATRIX) {
993       PetscCall(MatHeaderReplace(A,&Bmpi));
994     } else *B = Bmpi;
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1000 {
1001   Mat_ScaLAPACK  *b;
1002   Mat            Bmpi;
1003   MPI_Comm       comm;
1004   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1005   const PetscInt *ranges;
1006   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1007   PetscScalar    *aarray;
1008   PetscInt       lda;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1012 
1013   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1014   else {
1015     PetscCall(MatCreate(comm,&Bmpi));
1016     m = PETSC_DECIDE;
1017     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1018     n = PETSC_DECIDE;
1019     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1020     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
1021     PetscCall(MatSetType(Bmpi,MATSCALAPACK));
1022     PetscCall(MatSetUp(Bmpi));
1023   }
1024   b = (Mat_ScaLAPACK*)Bmpi->data;
1025 
1026   /* create ScaLAPACK descriptor for A (1d block distribution) */
1027   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
1028   PetscCall(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
1029   PetscCall(MatDenseGetLDA(A,&lda));
1030   lld = PetscMax(lda,1);  /* local leading dimension */
1031   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1032   PetscCheckScaLapackInfo("descinit",info);
1033 
1034   /* redistribute matrix */
1035   PetscCall(MatDenseGetArray(A,&aarray));
1036   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1037   PetscCall(MatDenseRestoreArray(A,&aarray));
1038 
1039   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1040   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1041   if (reuse == MAT_INPLACE_MATRIX) {
1042     PetscCall(MatHeaderReplace(A,&Bmpi));
1043   } else *B = Bmpi;
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1048 {
1049   Mat               mat_scal;
1050   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1051   const PetscInt    *cols;
1052   const PetscScalar *vals;
1053 
1054   PetscFunctionBegin;
1055   if (reuse == MAT_REUSE_MATRIX) {
1056     mat_scal = *newmat;
1057     PetscCall(MatZeroEntries(mat_scal));
1058   } else {
1059     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1060     m = PETSC_DECIDE;
1061     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1062     n = PETSC_DECIDE;
1063     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1064     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1065     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1066     PetscCall(MatSetUp(mat_scal));
1067   }
1068   for (row=rstart;row<rend;row++) {
1069     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1070     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
1071     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1072   }
1073   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1074   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1075 
1076   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1077   else *newmat = mat_scal;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1082 {
1083   Mat               mat_scal;
1084   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1085   const PetscInt    *cols;
1086   const PetscScalar *vals;
1087   PetscScalar       v;
1088 
1089   PetscFunctionBegin;
1090   if (reuse == MAT_REUSE_MATRIX) {
1091     mat_scal = *newmat;
1092     PetscCall(MatZeroEntries(mat_scal));
1093   } else {
1094     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1095     m = PETSC_DECIDE;
1096     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1097     n = PETSC_DECIDE;
1098     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1099     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1100     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1101     PetscCall(MatSetUp(mat_scal));
1102   }
1103   PetscCall(MatGetRowUpperTriangular(A));
1104   for (row=rstart;row<rend;row++) {
1105     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1106     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1107     for (j=0;j<ncols;j++) { /* lower triangular part */
1108       if (cols[j] == row) continue;
1109       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1110       PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1111     }
1112     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1113   }
1114   PetscCall(MatRestoreRowUpperTriangular(A));
1115   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1116   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1117 
1118   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1119   else *newmat = mat_scal;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124 {
1125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1126   PetscInt       sz=0;
1127 
1128   PetscFunctionBegin;
1129   PetscCall(PetscLayoutSetUp(A->rmap));
1130   PetscCall(PetscLayoutSetUp(A->cmap));
1131   if (!a->lld) a->lld = a->locr;
1132 
1133   PetscCall(PetscFree(a->loc));
1134   PetscCall(PetscIntMultError(a->lld,a->locc,&sz));
1135   PetscCall(PetscCalloc1(sz,&a->loc));
1136   PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1137 
1138   A->preallocated = PETSC_TRUE;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1143 {
1144   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1145   Mat_ScaLAPACK_Grid *grid;
1146   PetscBool          flg;
1147   MPI_Comm           icomm;
1148 
1149   PetscFunctionBegin;
1150   PetscCall(MatStashDestroy_Private(&A->stash));
1151   PetscCall(PetscFree(a->loc));
1152   PetscCall(PetscFree(a->pivots));
1153   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1154   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1155   if (--grid->grid_refct == 0) {
1156     Cblacs_gridexit(grid->ictxt);
1157     Cblacs_gridexit(grid->ictxrow);
1158     Cblacs_gridexit(grid->ictxcol);
1159     PetscCall(PetscFree(grid));
1160     PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1161   }
1162   PetscCall(PetscCommDestroy(&icomm));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
1167   PetscCall(PetscFree(A->data));
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1172 {
1173   const PetscInt *ranges;
1174   PetscMPIInt    size;
1175   PetscInt       i,n;
1176 
1177   PetscFunctionBegin;
1178   PetscCallMPI(MPI_Comm_size(map->comm,&size));
1179   if (size>2) {
1180     PetscCall(PetscLayoutGetRanges(map,&ranges));
1181     n = ranges[1]-ranges[0];
1182     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1183     PetscCheck(i >= size-1 || ranges[i+1]-ranges[i] == 0 || ranges[i+2]-ranges[i+1] == 0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1189 {
1190   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1191   PetscBLASInt   info=0;
1192 
1193   PetscFunctionBegin;
1194   PetscCall(PetscLayoutSetUp(A->rmap));
1195   PetscCall(PetscLayoutSetUp(A->cmap));
1196 
1197   /* check that the layout is as enforced by MatCreateScaLAPACK */
1198   PetscCall(MatScaLAPACKCheckLayout(A->rmap));
1199   PetscCall(MatScaLAPACKCheckLayout(A->cmap));
1200 
1201   /* compute local sizes */
1202   PetscCall(PetscBLASIntCast(A->rmap->N,&a->M));
1203   PetscCall(PetscBLASIntCast(A->cmap->N,&a->N));
1204   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1205   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1206   a->lld  = PetscMax(1,a->locr);
1207 
1208   /* allocate local array */
1209   PetscCall(MatScaLAPACKSetPreallocation(A));
1210 
1211   /* set up ScaLAPACK descriptor */
1212   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1213   PetscCheckScaLapackInfo("descinit",info);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1218 {
1219   PetscInt       nstash,reallocs;
1220 
1221   PetscFunctionBegin;
1222   if (A->nooffprocentries) PetscFunctionReturn(0);
1223   PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL));
1224   PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
1225   PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1230 {
1231   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1232   PetscMPIInt    n;
1233   PetscInt       i,flg,*row,*col;
1234   PetscScalar    *val;
1235   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1236 
1237   PetscFunctionBegin;
1238   if (A->nooffprocentries) PetscFunctionReturn(0);
1239   while (1) {
1240     PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1241     if (!flg) break;
1242     for (i=0;i<n;i++) {
1243       PetscCall(PetscBLASIntCast(row[i]+1,&gridx));
1244       PetscCall(PetscBLASIntCast(col[i]+1,&gcidx));
1245       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1246       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1247       switch (A->insertmode) {
1248         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1249         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1250         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1251       }
1252     }
1253   }
1254   PetscCall(MatStashScatterEnd_Private(&A->stash));
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1259 {
1260   Mat            Adense,As;
1261   MPI_Comm       comm;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1265   PetscCall(MatCreate(comm,&Adense));
1266   PetscCall(MatSetType(Adense,MATDENSE));
1267   PetscCall(MatLoad(Adense,viewer));
1268   PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
1269   PetscCall(MatDestroy(&Adense));
1270   PetscCall(MatHeaderReplace(newMat,&As));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* -------------------------------------------------------------------*/
1275 static struct _MatOps MatOps_Values = {
1276        MatSetValues_ScaLAPACK,
1277        0,
1278        0,
1279        MatMult_ScaLAPACK,
1280 /* 4*/ MatMultAdd_ScaLAPACK,
1281        MatMultTranspose_ScaLAPACK,
1282        MatMultTransposeAdd_ScaLAPACK,
1283        MatSolve_ScaLAPACK,
1284        MatSolveAdd_ScaLAPACK,
1285        0,
1286 /*10*/ 0,
1287        MatLUFactor_ScaLAPACK,
1288        MatCholeskyFactor_ScaLAPACK,
1289        0,
1290        MatTranspose_ScaLAPACK,
1291 /*15*/ MatGetInfo_ScaLAPACK,
1292        0,
1293        MatGetDiagonal_ScaLAPACK,
1294        MatDiagonalScale_ScaLAPACK,
1295        MatNorm_ScaLAPACK,
1296 /*20*/ MatAssemblyBegin_ScaLAPACK,
1297        MatAssemblyEnd_ScaLAPACK,
1298        MatSetOption_ScaLAPACK,
1299        MatZeroEntries_ScaLAPACK,
1300 /*24*/ 0,
1301        MatLUFactorSymbolic_ScaLAPACK,
1302        MatLUFactorNumeric_ScaLAPACK,
1303        MatCholeskyFactorSymbolic_ScaLAPACK,
1304        MatCholeskyFactorNumeric_ScaLAPACK,
1305 /*29*/ MatSetUp_ScaLAPACK,
1306        0,
1307        0,
1308        0,
1309        0,
1310 /*34*/ MatDuplicate_ScaLAPACK,
1311        0,
1312        0,
1313        0,
1314        0,
1315 /*39*/ MatAXPY_ScaLAPACK,
1316        0,
1317        0,
1318        0,
1319        MatCopy_ScaLAPACK,
1320 /*44*/ 0,
1321        MatScale_ScaLAPACK,
1322        MatShift_ScaLAPACK,
1323        0,
1324        0,
1325 /*49*/ 0,
1326        0,
1327        0,
1328        0,
1329        0,
1330 /*54*/ 0,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*59*/ 0,
1336        MatDestroy_ScaLAPACK,
1337        MatView_ScaLAPACK,
1338        0,
1339        0,
1340 /*64*/ 0,
1341        0,
1342        0,
1343        0,
1344        0,
1345 /*69*/ 0,
1346        0,
1347        MatConvert_ScaLAPACK_Dense,
1348        0,
1349        0,
1350 /*74*/ 0,
1351        0,
1352        0,
1353        0,
1354        0,
1355 /*79*/ 0,
1356        0,
1357        0,
1358        0,
1359        MatLoad_ScaLAPACK,
1360 /*84*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*89*/ 0,
1366        0,
1367        MatMatMultNumeric_ScaLAPACK,
1368        0,
1369        0,
1370 /*94*/ 0,
1371        0,
1372        0,
1373        MatMatTransposeMultNumeric_ScaLAPACK,
1374        0,
1375 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1376        0,
1377        0,
1378        MatConjugate_ScaLAPACK,
1379        0,
1380 /*104*/0,
1381        0,
1382        0,
1383        0,
1384        0,
1385 /*109*/MatMatSolve_ScaLAPACK,
1386        0,
1387        0,
1388        0,
1389        MatMissingDiagonal_ScaLAPACK,
1390 /*114*/0,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*119*/0,
1396        MatHermitianTranspose_ScaLAPACK,
1397        0,
1398        0,
1399        0,
1400 /*124*/0,
1401        0,
1402        0,
1403        0,
1404        0,
1405 /*129*/0,
1406        0,
1407        0,
1408        0,
1409        0,
1410 /*134*/0,
1411        0,
1412        0,
1413        0,
1414        0,
1415        0,
1416 /*140*/0,
1417        0,
1418        0,
1419        0,
1420        0,
1421 /*145*/0,
1422        0,
1423        0
1424 };
1425 
1426 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1427 {
1428   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1429   PetscInt           size=stash->size,nsends;
1430   PetscInt           count,*sindices,**rindices,i,j,l;
1431   PetscScalar        **rvalues,*svalues;
1432   MPI_Comm           comm = stash->comm;
1433   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1434   PetscMPIInt        *sizes,*nlengths,nreceives;
1435   PetscInt           *sp_idx,*sp_idy;
1436   PetscScalar        *sp_val;
1437   PetscMatStashSpace space,space_next;
1438   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1439   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1440 
1441   PetscFunctionBegin;
1442   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1443     InsertMode addv;
1444     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
1445     PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1446     mat->insertmode = addv; /* in case this processor had no cache */
1447   }
1448 
1449   bs2 = stash->bs*stash->bs;
1450 
1451   /*  first count number of contributors to each processor */
1452   PetscCall(PetscCalloc1(size,&nlengths));
1453   PetscCall(PetscMalloc1(stash->n+1,&owner));
1454 
1455   i     = j    = 0;
1456   space = stash->space_head;
1457   while (space) {
1458     space_next = space->next;
1459     for (l=0; l<space->local_used; l++) {
1460       PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx));
1461       PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1462       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1463       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1464       nlengths[j]++; owner[i] = j;
1465       i++;
1466     }
1467     space = space_next;
1468   }
1469 
1470   /* Now check what procs get messages - and compute nsends. */
1471   PetscCall(PetscCalloc1(size,&sizes));
1472   for (i=0, nsends=0; i<size; i++) {
1473     if (nlengths[i]) {
1474       sizes[i] = 1; nsends++;
1475     }
1476   }
1477 
1478   {PetscMPIInt *onodes,*olengths;
1479    /* Determine the number of messages to expect, their lengths, from from-ids */
1480    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
1481    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1482    /* since clubbing row,col - lengths are multiplied by 2 */
1483    for (i=0; i<nreceives; i++) olengths[i] *=2;
1484    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1485    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1486    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1487    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
1488    PetscCall(PetscFree(onodes));
1489    PetscCall(PetscFree(olengths));}
1490 
1491   /* do sends:
1492       1) starts[i] gives the starting index in svalues for stuff going to
1493          the ith processor
1494   */
1495   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
1496   PetscCall(PetscMalloc1(2*nsends,&send_waits));
1497   PetscCall(PetscMalloc2(size,&startv,size,&starti));
1498   /* use 2 sends the first with all_a, the next with all_i and all_j */
1499   startv[0] = 0; starti[0] = 0;
1500   for (i=1; i<size; i++) {
1501     startv[i] = startv[i-1] + nlengths[i-1];
1502     starti[i] = starti[i-1] + 2*nlengths[i-1];
1503   }
1504 
1505   i     = 0;
1506   space = stash->space_head;
1507   while (space) {
1508     space_next = space->next;
1509     sp_idx     = space->idx;
1510     sp_idy     = space->idy;
1511     sp_val     = space->val;
1512     for (l=0; l<space->local_used; l++) {
1513       j = owner[i];
1514       if (bs2 == 1) {
1515         svalues[startv[j]] = sp_val[l];
1516       } else {
1517         PetscInt    k;
1518         PetscScalar *buf1,*buf2;
1519         buf1 = svalues+bs2*startv[j];
1520         buf2 = space->val + bs2*l;
1521         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1522       }
1523       sindices[starti[j]]             = sp_idx[l];
1524       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1525       startv[j]++;
1526       starti[j]++;
1527       i++;
1528     }
1529     space = space_next;
1530   }
1531   startv[0] = 0;
1532   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1533 
1534   for (i=0,count=0; i<size; i++) {
1535     if (sizes[i]) {
1536       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
1537       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1538     }
1539   }
1540 #if defined(PETSC_USE_INFO)
1541   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1542   for (i=0; i<size; i++) {
1543     if (sizes[i]) {
1544       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1545     }
1546   }
1547 #endif
1548   PetscCall(PetscFree(nlengths));
1549   PetscCall(PetscFree(owner));
1550   PetscCall(PetscFree2(startv,starti));
1551   PetscCall(PetscFree(sizes));
1552 
1553   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1554   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
1555 
1556   for (i=0; i<nreceives; i++) {
1557     recv_waits[2*i]   = recv_waits1[i];
1558     recv_waits[2*i+1] = recv_waits2[i];
1559   }
1560   stash->recv_waits = recv_waits;
1561 
1562   PetscCall(PetscFree(recv_waits1));
1563   PetscCall(PetscFree(recv_waits2));
1564 
1565   stash->svalues         = svalues;
1566   stash->sindices        = sindices;
1567   stash->rvalues         = rvalues;
1568   stash->rindices        = rindices;
1569   stash->send_waits      = send_waits;
1570   stash->nsends          = nsends;
1571   stash->nrecvs          = nreceives;
1572   stash->reproduce_count = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1577 {
1578   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1579 
1580   PetscFunctionBegin;
1581   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1582   PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1583   PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
1584   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1585   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 /*@
1590    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1591    the ScaLAPACK matrix
1592 
1593    Logically Collective on A
1594 
1595    Input Parameters:
1596 +  A  - a MATSCALAPACK matrix
1597 .  mb - the row block size
1598 -  nb - the column block size
1599 
1600    Level: intermediate
1601 
1602 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1603 @*/
1604 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1605 {
1606   PetscFunctionBegin;
1607   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1608   PetscValidLogicalCollectiveInt(A,mb,2);
1609   PetscValidLogicalCollectiveInt(A,nb,3);
1610   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1615 {
1616   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1617 
1618   PetscFunctionBegin;
1619   if (mb) *mb = a->mb;
1620   if (nb) *nb = a->nb;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 /*@
1625    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1626    the ScaLAPACK matrix
1627 
1628    Not collective
1629 
1630    Input Parameter:
1631 .  A  - a MATSCALAPACK matrix
1632 
1633    Output Parameters:
1634 +  mb - the row block size
1635 -  nb - the column block size
1636 
1637    Level: intermediate
1638 
1639 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1640 @*/
1641 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1642 {
1643   PetscFunctionBegin;
1644   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1645   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1650 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1651 
1652 /*MC
1653    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1654 
1655    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1656 
1657    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1658 
1659    Options Database Keys:
1660 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1661 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1662 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1663 
1664    Level: beginner
1665 
1666 .seealso: `MATDENSE`, `MATELEMENTAL`
1667 M*/
1668 
1669 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1670 {
1671   Mat_ScaLAPACK      *a;
1672   PetscBool          flg,flg1;
1673   Mat_ScaLAPACK_Grid *grid;
1674   MPI_Comm           icomm;
1675   PetscBLASInt       nprow,npcol,myrow,mycol;
1676   PetscInt           optv1,k=2,array[2]={0,0};
1677   PetscMPIInt        size;
1678 
1679   PetscFunctionBegin;
1680   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1681   A->insertmode = NOT_SET_VALUES;
1682 
1683   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1684   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1685   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1686   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1687   A->stash.ScatterDestroy = NULL;
1688 
1689   PetscCall(PetscNewLog(A,&a));
1690   A->data = (void*)a;
1691 
1692   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1693   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1694     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
1695     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1696     PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1697   }
1698   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1699   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1700   if (!flg) {
1701     PetscCall(PetscNewLog(A,&grid));
1702 
1703     PetscCallMPI(MPI_Comm_size(icomm,&size));
1704     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1705 
1706     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1707     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1708     if (flg1) {
1709       PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1710       grid->nprow = optv1;
1711     }
1712     PetscOptionsEnd();
1713 
1714     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1715     grid->npcol = size/grid->nprow;
1716     PetscCall(PetscBLASIntCast(grid->nprow,&nprow));
1717     PetscCall(PetscBLASIntCast(grid->npcol,&npcol));
1718     grid->ictxt = Csys2blacs_handle(icomm);
1719     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1720     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1721     grid->grid_refct = 1;
1722     grid->nprow      = nprow;
1723     grid->npcol      = npcol;
1724     grid->myrow      = myrow;
1725     grid->mycol      = mycol;
1726     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1727     grid->ictxrow = Csys2blacs_handle(icomm);
1728     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1729     grid->ictxcol = Csys2blacs_handle(icomm);
1730     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1731     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1732 
1733   } else grid->grid_refct++;
1734   PetscCall(PetscCommDestroy(&icomm));
1735   a->grid = grid;
1736   a->mb   = DEFAULT_BLOCKSIZE;
1737   a->nb   = DEFAULT_BLOCKSIZE;
1738 
1739   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1740   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1741   if (flg) {
1742     a->mb = array[0];
1743     a->nb = (k>1)? array[1]: a->mb;
1744   }
1745   PetscOptionsEnd();
1746 
1747   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
1749   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
1750   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 /*@C
1755    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1756    (2D block cyclic distribution).
1757 
1758    Collective
1759 
1760    Input Parameters:
1761 +  comm - MPI communicator
1762 .  mb   - row block size (or PETSC_DECIDE to have it set)
1763 .  nb   - column block size (or PETSC_DECIDE to have it set)
1764 .  M    - number of global rows
1765 .  N    - number of global columns
1766 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1767 -  csrc - coordinate of process that owns the first column of the distributed matrix
1768 
1769    Output Parameter:
1770 .  A - the matrix
1771 
1772    Options Database Keys:
1773 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1774 
1775    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1776    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1777    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1778 
1779    Notes:
1780    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1781    is chosen.
1782 
1783    Storage Information:
1784    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1785    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1786    significance and are thus ignored. The block sizes refer to the values
1787    used for the distributed matrix, not the same meaning as in BAIJ.
1788 
1789    Level: intermediate
1790 
1791 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1792 @*/
1793 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1794 {
1795   Mat_ScaLAPACK  *a;
1796   PetscInt       m,n;
1797 
1798   PetscFunctionBegin;
1799   PetscCall(MatCreate(comm,A));
1800   PetscCall(MatSetType(*A,MATSCALAPACK));
1801   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1802   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1803   m = PETSC_DECIDE;
1804   PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1805   n = PETSC_DECIDE;
1806   PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1807   PetscCall(MatSetSizes(*A,m,n,M,N));
1808   a = (Mat_ScaLAPACK*)(*A)->data;
1809   PetscCall(PetscBLASIntCast(M,&a->M));
1810   PetscCall(PetscBLASIntCast(N,&a->N));
1811   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1812   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1813   PetscCall(PetscBLASIntCast(rsrc,&a->rsrc));
1814   PetscCall(PetscBLASIntCast(csrc,&a->csrc));
1815   PetscCall(MatSetUp(*A));
1816   PetscFunctionReturn(0);
1817 }
1818