xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscFunctionBegin;
25   PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n"));
26   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27   PetscFunctionReturn(0);
28 }
29 
30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
31 {
32   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
33   PetscBool         iascii;
34   PetscViewerFormat format;
35   Mat               Adense;
36 
37   PetscFunctionBegin;
38   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
39   if (iascii) {
40     PetscCall(PetscViewerGetFormat(viewer,&format));
41     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42       PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb));
43       PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol));
44       PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc));
45       PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc));
46       PetscFunctionReturn(0);
47     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48       PetscFunctionReturn(0);
49     }
50   }
51   /* convert to dense format and call MatView() */
52   PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
53   PetscCall(MatView(Adense,viewer));
54   PetscCall(MatDestroy(&Adense));
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
59 {
60   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
61   PetscLogDouble isend[2],irecv[2];
62 
63   PetscFunctionBegin;
64   info->block_size = 1.0;
65 
66   isend[0] = a->lld*a->locc;     /* locally allocated */
67   isend[1] = a->locr*a->locc;    /* used submatrix */
68   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69     info->nz_allocated   = isend[0];
70     info->nz_used        = isend[1];
71   } else if (flag == MAT_GLOBAL_MAX) {
72     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A)));
73     info->nz_allocated   = irecv[0];
74     info->nz_used        = irecv[1];
75   } else if (flag == MAT_GLOBAL_SUM) {
76     PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   }
80 
81   info->nz_unneeded       = 0;
82   info->assemblies        = A->num_ass;
83   info->mallocs           = 0;
84   info->memory            = ((PetscObject)A)->mem;
85   info->fill_ratio_given  = 0;
86   info->fill_ratio_needed = 0;
87   info->factor_mallocs    = 0;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
92 {
93   PetscFunctionBegin;
94   switch (op) {
95     case MAT_NEW_NONZERO_LOCATIONS:
96     case MAT_NEW_NONZERO_LOCATION_ERR:
97     case MAT_NEW_NONZERO_ALLOCATION_ERR:
98     case MAT_SYMMETRIC:
99     case MAT_SORTED_FULL:
100     case MAT_HERMITIAN:
101       break;
102     default:
103       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
109 {
110   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
111   PetscInt       i,j;
112   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
113 
114   PetscFunctionBegin;
115   for (i=0;i<nr;i++) {
116     if (rows[i] < 0) continue;
117     PetscCall(PetscBLASIntCast(rows[i]+1,&gridx));
118     for (j=0;j<nc;j++) {
119       if (cols[j] < 0) continue;
120       PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx));
121       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
122       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
123         switch (imode) {
124           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
125           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
126           default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
127         }
128       } else {
129         PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
130         A->assembled = PETSC_FALSE;
131         PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES)));
132       }
133     }
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
139 {
140   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
141   PetscScalar    *x2d,*y2d,alpha=1.0;
142   const PetscInt *ranges;
143   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
144 
145   PetscFunctionBegin;
146   if (transpose) {
147 
148     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
149     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
150     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
151     xlld = PetscMax(1,A->rmap->n);
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
155     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* y block size */
156     ylld = 1;
157     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
158     PetscCheckScaLapackInfo("descinit",info);
159 
160     /* allocate 2d vectors */
161     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
162     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
163     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
164     xlld = PetscMax(1,lszx);
165 
166     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
167     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
168     PetscCheckScaLapackInfo("descinit",info);
169     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
170     PetscCheckScaLapackInfo("descinit",info);
171 
172     /* redistribute x as a column of a 2d matrix */
173     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
174 
175     /* redistribute y as a row of a 2d matrix */
176     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
177 
178     /* call PBLAS subroutine */
179     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
180 
181     /* redistribute y from a row of a 2d matrix */
182     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
183 
184   } else {   /* non-transpose */
185 
186     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
187     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
188     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* x block size */
189     xlld = 1;
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
193     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* y block size */
194     ylld = PetscMax(1,A->rmap->n);
195     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
196     PetscCheckScaLapackInfo("descinit",info);
197 
198     /* allocate 2d vectors */
199     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
200     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
201     PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d));
202     ylld = PetscMax(1,lszy);
203 
204     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
205     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
206     PetscCheckScaLapackInfo("descinit",info);
207     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
208     PetscCheckScaLapackInfo("descinit",info);
209 
210     /* redistribute x as a row of a 2d matrix */
211     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
212 
213     /* redistribute y as a column of a 2d matrix */
214     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
215 
216     /* call PBLAS subroutine */
217     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
218 
219     /* redistribute y from a column of a 2d matrix */
220     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
221 
222   }
223   PetscCall(PetscFree2(x2d,y2d));
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   const PetscScalar *xarray;
230   PetscScalar       *yarray;
231 
232   PetscFunctionBegin;
233   PetscCall(VecGetArrayRead(x,&xarray));
234   PetscCall(VecGetArray(y,&yarray));
235   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray));
236   PetscCall(VecRestoreArrayRead(x,&xarray));
237   PetscCall(VecRestoreArray(y,&yarray));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
242 {
243   const PetscScalar *xarray;
244   PetscScalar       *yarray;
245 
246   PetscFunctionBegin;
247   PetscCall(VecGetArrayRead(x,&xarray));
248   PetscCall(VecGetArray(y,&yarray));
249   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray));
250   PetscCall(VecRestoreArrayRead(x,&xarray));
251   PetscCall(VecRestoreArray(y,&yarray));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
256 {
257   const PetscScalar *xarray;
258   PetscScalar       *zarray;
259 
260   PetscFunctionBegin;
261   if (y != z) PetscCall(VecCopy(y,z));
262   PetscCall(VecGetArrayRead(x,&xarray));
263   PetscCall(VecGetArray(z,&zarray));
264   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray));
265   PetscCall(VecRestoreArrayRead(x,&xarray));
266   PetscCall(VecRestoreArray(z,&zarray));
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
271 {
272   const PetscScalar *xarray;
273   PetscScalar       *zarray;
274 
275   PetscFunctionBegin;
276   if (y != z) PetscCall(VecCopy(y,z));
277   PetscCall(VecGetArrayRead(x,&xarray));
278   PetscCall(VecGetArray(z,&zarray));
279   PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray));
280   PetscCall(VecRestoreArrayRead(x,&xarray));
281   PetscCall(VecRestoreArray(z,&zarray));
282   PetscFunctionReturn(0);
283 }
284 
285 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
286 {
287   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
288   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
289   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
290   PetscScalar   sone=1.0,zero=0.0;
291   PetscBLASInt  one=1;
292 
293   PetscFunctionBegin;
294   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
295   C->assembled = PETSC_TRUE;
296   PetscFunctionReturn(0);
297 }
298 
299 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
300 {
301   PetscFunctionBegin;
302   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
303   PetscCall(MatSetType(C,MATSCALAPACK));
304   PetscCall(MatSetUp(C));
305   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
306   PetscFunctionReturn(0);
307 }
308 
309 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
310 {
311   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
312   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
313   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
314   PetscScalar   sone=1.0,zero=0.0;
315   PetscBLASInt  one=1;
316 
317   PetscFunctionBegin;
318   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324 {
325   PetscFunctionBegin;
326   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
327   PetscCall(MatSetType(C,MATSCALAPACK));
328   PetscCall(MatSetUp(C));
329   PetscFunctionReturn(0);
330 }
331 
332 /* --------------------------------------- */
333 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
334 {
335   PetscFunctionBegin;
336   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
337   C->ops->productsymbolic = MatProductSymbolic_AB;
338   PetscFunctionReturn(0);
339 }
340 
341 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
342 {
343   PetscFunctionBegin;
344   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
345   C->ops->productsymbolic          = MatProductSymbolic_ABt;
346   PetscFunctionReturn(0);
347 }
348 
349 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
350 {
351   Mat_Product    *product = C->product;
352 
353   PetscFunctionBegin;
354   switch (product->type) {
355     case MATPRODUCT_AB:
356       PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
357       break;
358     case MATPRODUCT_ABt:
359       PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
360       break;
361     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
362   }
363   PetscFunctionReturn(0);
364 }
365 /* --------------------------------------- */
366 
367 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
368 {
369   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
370   PetscScalar       *darray,*d2d,v;
371   const PetscInt    *ranges;
372   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
373 
374   PetscFunctionBegin;
375   PetscCall(VecGetArray(D,&darray));
376 
377   if (A->rmap->N<=A->cmap->N) {   /* row version */
378 
379     /* create ScaLAPACK descriptor for vector (1d block distribution) */
380     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
381     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
382     dlld = PetscMax(1,A->rmap->n);
383     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
384     PetscCheckScaLapackInfo("descinit",info);
385 
386     /* allocate 2d vector */
387     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
388     PetscCall(PetscCalloc1(lszd,&d2d));
389     dlld = PetscMax(1,lszd);
390 
391     /* create ScaLAPACK descriptor for vector (2d block distribution) */
392     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
393     PetscCheckScaLapackInfo("descinit",info);
394 
395     /* collect diagonal */
396     for (j=1;j<=a->M;j++) {
397       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
398       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
399     }
400 
401     /* redistribute d from a column of a 2d matrix */
402     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
403     PetscCall(PetscFree(d2d));
404 
405   } else {   /* column version */
406 
407     /* create ScaLAPACK descriptor for vector (1d block distribution) */
408     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
409     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
410     dlld = 1;
411     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
412     PetscCheckScaLapackInfo("descinit",info);
413 
414     /* allocate 2d vector */
415     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
416     PetscCall(PetscCalloc1(lszd,&d2d));
417 
418     /* create ScaLAPACK descriptor for vector (2d block distribution) */
419     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
420     PetscCheckScaLapackInfo("descinit",info);
421 
422     /* collect diagonal */
423     for (j=1;j<=a->N;j++) {
424       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
425       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
426     }
427 
428     /* redistribute d from a row of a 2d matrix */
429     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
430     PetscCall(PetscFree(d2d));
431   }
432 
433   PetscCall(VecRestoreArray(D,&darray));
434   PetscCall(VecAssemblyBegin(D));
435   PetscCall(VecAssemblyEnd(D));
436   PetscFunctionReturn(0);
437 }
438 
439 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
440 {
441   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
442   const PetscScalar *d;
443   const PetscInt    *ranges;
444   PetscScalar       *d2d;
445   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
446 
447   PetscFunctionBegin;
448   if (R) {
449     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
450     /* create ScaLAPACK descriptor for vector (1d block distribution) */
451     PetscCall(PetscLayoutGetRanges(A->cmap,&ranges));
452     PetscCall(PetscBLASIntCast(ranges[1],&nb));  /* D block size */
453     dlld = 1;
454     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
455     PetscCheckScaLapackInfo("descinit",info);
456 
457     /* allocate 2d vector */
458     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
459     PetscCall(PetscCalloc1(lszd,&d2d));
460 
461     /* create ScaLAPACK descriptor for vector (2d block distribution) */
462     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
463     PetscCheckScaLapackInfo("descinit",info);
464 
465     /* redistribute d to a row of a 2d matrix */
466     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
467 
468     /* broadcast along process columns */
469     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
470     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
471 
472     /* local scaling */
473     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
474 
475     PetscCall(PetscFree(d2d));
476     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
477   }
478   if (L) {
479     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
480     /* create ScaLAPACK descriptor for vector (1d block distribution) */
481     PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
482     PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* D block size */
483     dlld = PetscMax(1,A->rmap->n);
484     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
485     PetscCheckScaLapackInfo("descinit",info);
486 
487     /* allocate 2d vector */
488     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
489     PetscCall(PetscCalloc1(lszd,&d2d));
490     dlld = PetscMax(1,lszd);
491 
492     /* create ScaLAPACK descriptor for vector (2d block distribution) */
493     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
494     PetscCheckScaLapackInfo("descinit",info);
495 
496     /* redistribute d to a column of a 2d matrix */
497     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
498 
499     /* broadcast along process rows */
500     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
501     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
502 
503     /* local scaling */
504     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
505 
506     PetscCall(PetscFree(d2d));
507     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
513 {
514   PetscFunctionBegin;
515   *missing = PETSC_FALSE;
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
520 {
521   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
522   PetscBLASInt  n,one=1;
523 
524   PetscFunctionBegin;
525   n = x->lld*x->locc;
526   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
527   PetscFunctionReturn(0);
528 }
529 
530 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
531 {
532   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
533   PetscBLASInt  i,n;
534   PetscScalar   v;
535 
536   PetscFunctionBegin;
537   n = PetscMin(x->M,x->N);
538   for (i=1;i<=n;i++) {
539     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
540     v += alpha;
541     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
547 {
548   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
549   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
550   PetscBLASInt   one=1;
551   PetscScalar    beta=1.0;
552 
553   PetscFunctionBegin;
554   MatScaLAPACKCheckDistribution(Y,1,X,3);
555   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
556   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
557   PetscFunctionReturn(0);
558 }
559 
560 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
561 {
562   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
563   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
564 
565   PetscFunctionBegin;
566   PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
567   PetscCall(PetscObjectStateIncrease((PetscObject)B));
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
572 {
573   Mat            Bs;
574   MPI_Comm       comm;
575   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
579   PetscCall(MatCreate(comm,&Bs));
580   PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
581   PetscCall(MatSetType(Bs,MATSCALAPACK));
582   b = (Mat_ScaLAPACK*)Bs->data;
583   b->M    = a->M;
584   b->N    = a->N;
585   b->mb   = a->mb;
586   b->nb   = a->nb;
587   b->rsrc = a->rsrc;
588   b->csrc = a->csrc;
589   PetscCall(MatSetUp(Bs));
590   *B = Bs;
591   if (op == MAT_COPY_VALUES) {
592     PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc));
593   }
594   Bs->assembled = PETSC_TRUE;
595   PetscFunctionReturn(0);
596 }
597 
598 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
599 {
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
611     *B = Bs;
612   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
613   b = (Mat_ScaLAPACK*)Bs->data;
614   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
615 #if defined(PETSC_USE_COMPLEX)
616   /* undo conjugation */
617   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
618 #endif
619   Bs->assembled = PETSC_TRUE;
620   PetscFunctionReturn(0);
621 }
622 
623 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
624 {
625   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
626   PetscInt      i,j;
627 
628   PetscFunctionBegin;
629   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
634 {
635   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
636   Mat            Bs = *B;
637   PetscBLASInt   one=1;
638   PetscScalar    sone=1.0,zero=0.0;
639 
640   PetscFunctionBegin;
641   if (reuse == MAT_INITIAL_MATRIX) {
642     PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs));
643     *B = Bs;
644   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
645   b = (Mat_ScaLAPACK*)Bs->data;
646   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
647   Bs->assembled = PETSC_TRUE;
648   PetscFunctionReturn(0);
649 }
650 
651 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
652 {
653   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
654   PetscScalar    *x,*x2d;
655   const PetscInt *ranges;
656   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
657 
658   PetscFunctionBegin;
659   PetscCall(VecCopy(B,X));
660   PetscCall(VecGetArray(X,&x));
661 
662   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
663   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
664   PetscCall(PetscBLASIntCast(ranges[1],&mb));  /* x block size */
665   xlld = PetscMax(1,A->rmap->n);
666   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
667   PetscCheckScaLapackInfo("descinit",info);
668 
669   /* allocate 2d vector */
670   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
671   PetscCall(PetscMalloc1(lszx,&x2d));
672   xlld = PetscMax(1,lszx);
673 
674   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
675   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
676   PetscCheckScaLapackInfo("descinit",info);
677 
678   /* redistribute x as a column of a 2d matrix */
679   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
680 
681   /* call ScaLAPACK subroutine */
682   switch (A->factortype) {
683     case MAT_FACTOR_LU:
684       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
685       PetscCheckScaLapackInfo("getrs",info);
686       break;
687     case MAT_FACTOR_CHOLESKY:
688       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
689       PetscCheckScaLapackInfo("potrs",info);
690       break;
691     default:
692       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
693   }
694 
695   /* redistribute x from a column of a 2d matrix */
696   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
697 
698   PetscCall(PetscFree(x2d));
699   PetscCall(VecRestoreArray(X,&x));
700   PetscFunctionReturn(0);
701 }
702 
703 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
704 {
705   PetscFunctionBegin;
706   PetscCall(MatSolve_ScaLAPACK(A,B,X));
707   PetscCall(VecAXPY(X,1,Y));
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
712 {
713   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
714   PetscBool      flg1,flg2;
715   PetscBLASInt   one=1,info;
716 
717   PetscFunctionBegin;
718   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1));
719   PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2));
720   PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
721   MatScaLAPACKCheckDistribution(B,1,X,2);
722   b = (Mat_ScaLAPACK*)B->data;
723   x = (Mat_ScaLAPACK*)X->data;
724   PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc));
725 
726   switch (A->factortype) {
727     case MAT_FACTOR_LU:
728       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
729       PetscCheckScaLapackInfo("getrs",info);
730       break;
731     case MAT_FACTOR_CHOLESKY:
732       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
733       PetscCheckScaLapackInfo("potrs",info);
734       break;
735     default:
736       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
742 {
743   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
744   PetscBLASInt   one=1,info;
745 
746   PetscFunctionBegin;
747   if (!a->pivots) {
748     PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots));
749     PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt)));
750   }
751   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
752   PetscCheckScaLapackInfo("getrf",info);
753   A->factortype = MAT_FACTOR_LU;
754   A->assembled  = PETSC_TRUE;
755 
756   PetscCall(PetscFree(A->solvertype));
757   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
758   PetscFunctionReturn(0);
759 }
760 
761 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
762 {
763   PetscFunctionBegin;
764   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
765   PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
770 {
771   PetscFunctionBegin;
772   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
773   PetscFunctionReturn(0);
774 }
775 
776 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
777 {
778   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
779   PetscBLASInt   one=1,info;
780 
781   PetscFunctionBegin;
782   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
783   PetscCheckScaLapackInfo("potrf",info);
784   A->factortype = MAT_FACTOR_CHOLESKY;
785   A->assembled  = PETSC_TRUE;
786 
787   PetscCall(PetscFree(A->solvertype));
788   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype));
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
793 {
794   PetscFunctionBegin;
795   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
796   PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info));
797   PetscFunctionReturn(0);
798 }
799 
800 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
801 {
802   PetscFunctionBegin;
803   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
808 {
809   PetscFunctionBegin;
810   *type = MATSOLVERSCALAPACK;
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
815 {
816   Mat            B;
817   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
818 
819   PetscFunctionBegin;
820   /* Create the factorization matrix */
821   PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B));
822   B->trivialsymbolic = PETSC_TRUE;
823   B->factortype = ftype;
824   PetscCall(PetscFree(B->solvertype));
825   PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype));
826 
827   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack));
828   *F = B;
829   PetscFunctionReturn(0);
830 }
831 
832 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
833 {
834   PetscFunctionBegin;
835   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack));
836   PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack));
837   PetscFunctionReturn(0);
838 }
839 
840 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
843   PetscBLASInt   one=1,lwork=0;
844   const char     *ntype;
845   PetscScalar    *work=NULL,dummy;
846 
847   PetscFunctionBegin;
848   switch (type) {
849     case NORM_1:
850       ntype = "1";
851       lwork = PetscMax(a->locr,a->locc);
852       break;
853     case NORM_FROBENIUS:
854       ntype = "F";
855       work  = &dummy;
856       break;
857     case NORM_INFINITY:
858       ntype = "I";
859       lwork = PetscMax(a->locr,a->locc);
860       break;
861     default:
862       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
863   }
864   if (lwork) PetscCall(PetscMalloc1(lwork,&work));
865   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
866   if (lwork) PetscCall(PetscFree(work));
867   PetscFunctionReturn(0);
868 }
869 
870 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
871 {
872   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
873 
874   PetscFunctionBegin;
875   PetscCall(PetscArrayzero(a->loc,a->lld*a->locc));
876   PetscFunctionReturn(0);
877 }
878 
879 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
880 {
881   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
882   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
883 
884   PetscFunctionBegin;
885   if (rows) {
886     n     = a->locr;
887     nb    = a->mb;
888     isrc  = a->rsrc;
889     nproc = a->grid->nprow;
890     iproc = a->grid->myrow;
891     PetscCall(PetscMalloc1(n,&idx));
892     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
893     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows));
894   }
895   if (cols) {
896     n     = a->locc;
897     nb    = a->nb;
898     isrc  = a->csrc;
899     nproc = a->grid->npcol;
900     iproc = a->grid->mycol;
901     PetscCall(PetscMalloc1(n,&idx));
902     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
903     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols));
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
909 {
910   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
911   Mat            Bmpi;
912   MPI_Comm       comm;
913   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
914   const PetscInt *ranges,*branges,*cwork;
915   const PetscScalar *vwork;
916   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
917   PetscScalar    *barray;
918   PetscBool      differ=PETSC_FALSE;
919   PetscMPIInt    size;
920 
921   PetscFunctionBegin;
922   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
923   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
924 
925   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
926     PetscCallMPI(MPI_Comm_size(comm,&size));
927     PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges));
928     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
929   }
930 
931   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
932     PetscCall(MatCreate(comm,&Bmpi));
933     m = PETSC_DECIDE;
934     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
935     n = PETSC_DECIDE;
936     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
937     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
938     PetscCall(MatSetType(Bmpi,MATDENSE));
939     PetscCall(MatSetUp(Bmpi));
940 
941     /* create ScaLAPACK descriptor for B (1d block distribution) */
942     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
943     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
944     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
945     PetscCheckScaLapackInfo("descinit",info);
946 
947     /* redistribute matrix */
948     PetscCall(MatDenseGetArray(Bmpi,&barray));
949     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
950     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
951     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
952     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
953 
954     /* transfer rows of auxiliary matrix to the final matrix B */
955     PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend));
956     for (i=rstart;i<rend;i++) {
957       PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork));
958       PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES));
959       PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork));
960     }
961     PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY));
962     PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY));
963     PetscCall(MatDestroy(&Bmpi));
964 
965   } else {  /* normal cases */
966 
967     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
968     else {
969       PetscCall(MatCreate(comm,&Bmpi));
970       m = PETSC_DECIDE;
971       PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
972       n = PETSC_DECIDE;
973       PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
974       PetscCall(MatSetSizes(Bmpi,m,n,M,N));
975       PetscCall(MatSetType(Bmpi,MATDENSE));
976       PetscCall(MatSetUp(Bmpi));
977     }
978 
979     /* create ScaLAPACK descriptor for B (1d block distribution) */
980     PetscCall(PetscBLASIntCast(ranges[1],&bmb));  /* row block size */
981     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
983     PetscCheckScaLapackInfo("descinit",info);
984 
985     /* redistribute matrix */
986     PetscCall(MatDenseGetArray(Bmpi,&barray));
987     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
988     PetscCall(MatDenseRestoreArray(Bmpi,&barray));
989 
990     PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
991     PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
992     if (reuse == MAT_INPLACE_MATRIX) {
993       PetscCall(MatHeaderReplace(A,&Bmpi));
994     } else *B = Bmpi;
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1000 {
1001   Mat_ScaLAPACK  *b;
1002   Mat            Bmpi;
1003   MPI_Comm       comm;
1004   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1005   const PetscInt *ranges;
1006   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1007   PetscScalar    *aarray;
1008   PetscInt       lda;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1012 
1013   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1014   else {
1015     PetscCall(MatCreate(comm,&Bmpi));
1016     m = PETSC_DECIDE;
1017     PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1018     n = PETSC_DECIDE;
1019     PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1020     PetscCall(MatSetSizes(Bmpi,m,n,M,N));
1021     PetscCall(MatSetType(Bmpi,MATSCALAPACK));
1022     PetscCall(MatSetUp(Bmpi));
1023   }
1024   b = (Mat_ScaLAPACK*)Bmpi->data;
1025 
1026   /* create ScaLAPACK descriptor for A (1d block distribution) */
1027   PetscCall(PetscLayoutGetRanges(A->rmap,&ranges));
1028   PetscCall(PetscBLASIntCast(ranges[1],&amb));  /* row block size */
1029   PetscCall(MatDenseGetLDA(A,&lda));
1030   lld = PetscMax(lda,1);  /* local leading dimension */
1031   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1032   PetscCheckScaLapackInfo("descinit",info);
1033 
1034   /* redistribute matrix */
1035   PetscCall(MatDenseGetArray(A,&aarray));
1036   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1037   PetscCall(MatDenseRestoreArray(A,&aarray));
1038 
1039   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
1040   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
1041   if (reuse == MAT_INPLACE_MATRIX) {
1042     PetscCall(MatHeaderReplace(A,&Bmpi));
1043   } else *B = Bmpi;
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1048 {
1049   Mat               mat_scal;
1050   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1051   const PetscInt    *cols;
1052   const PetscScalar *vals;
1053 
1054   PetscFunctionBegin;
1055   if (reuse == MAT_REUSE_MATRIX) {
1056     mat_scal = *newmat;
1057     PetscCall(MatZeroEntries(mat_scal));
1058   } else {
1059     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1060     m = PETSC_DECIDE;
1061     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1062     n = PETSC_DECIDE;
1063     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1064     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1065     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1066     PetscCall(MatSetUp(mat_scal));
1067   }
1068   for (row=rstart;row<rend;row++) {
1069     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1070     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES));
1071     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1072   }
1073   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1074   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1075 
1076   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1077   else *newmat = mat_scal;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1082 {
1083   Mat               mat_scal;
1084   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1085   const PetscInt    *cols;
1086   const PetscScalar *vals;
1087   PetscScalar       v;
1088 
1089   PetscFunctionBegin;
1090   if (reuse == MAT_REUSE_MATRIX) {
1091     mat_scal = *newmat;
1092     PetscCall(MatZeroEntries(mat_scal));
1093   } else {
1094     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal));
1095     m = PETSC_DECIDE;
1096     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M));
1097     n = PETSC_DECIDE;
1098     PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N));
1099     PetscCall(MatSetSizes(mat_scal,m,n,M,N));
1100     PetscCall(MatSetType(mat_scal,MATSCALAPACK));
1101     PetscCall(MatSetUp(mat_scal));
1102   }
1103   PetscCall(MatGetRowUpperTriangular(A));
1104   for (row=rstart;row<rend;row++) {
1105     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1106     PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES));
1107     for (j=0;j<ncols;j++) { /* lower triangular part */
1108       if (cols[j] == row) continue;
1109       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1110       PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES));
1111     }
1112     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1113   }
1114   PetscCall(MatRestoreRowUpperTriangular(A));
1115   PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY));
1116   PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY));
1117 
1118   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal));
1119   else *newmat = mat_scal;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124 {
1125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1126   PetscInt       sz=0;
1127 
1128   PetscFunctionBegin;
1129   PetscCall(PetscLayoutSetUp(A->rmap));
1130   PetscCall(PetscLayoutSetUp(A->cmap));
1131   if (!a->lld) a->lld = a->locr;
1132 
1133   PetscCall(PetscFree(a->loc));
1134   PetscCall(PetscIntMultError(a->lld,a->locc,&sz));
1135   PetscCall(PetscCalloc1(sz,&a->loc));
1136   PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar)));
1137 
1138   A->preallocated = PETSC_TRUE;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1143 {
1144   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1145   Mat_ScaLAPACK_Grid *grid;
1146   PetscBool          flg;
1147   MPI_Comm           icomm;
1148 
1149   PetscFunctionBegin;
1150   PetscCall(MatStashDestroy_Private(&A->stash));
1151   PetscCall(PetscFree(a->loc));
1152   PetscCall(PetscFree(a->pivots));
1153   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1154   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1155   if (--grid->grid_refct == 0) {
1156     Cblacs_gridexit(grid->ictxt);
1157     Cblacs_gridexit(grid->ictxrow);
1158     Cblacs_gridexit(grid->ictxcol);
1159     PetscCall(PetscFree(grid));
1160     PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval));
1161   }
1162   PetscCall(PetscCommDestroy(&icomm));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL));
1167   PetscCall(PetscFree(A->data));
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1172 {
1173   const PetscInt *ranges;
1174   PetscMPIInt    size;
1175   PetscInt       i,n;
1176 
1177   PetscFunctionBegin;
1178   PetscCallMPI(MPI_Comm_size(map->comm,&size));
1179   if (size>2) {
1180     PetscCall(PetscLayoutGetRanges(map,&ranges));
1181     n = ranges[1]-ranges[0];
1182     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1183     PetscCheck(i >= size-1 || ranges[i+1]-ranges[i] == 0 || ranges[i+2]-ranges[i+1] == 0,map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1189 {
1190   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1191   PetscBLASInt   info=0;
1192 
1193   PetscFunctionBegin;
1194   PetscCall(PetscLayoutSetUp(A->rmap));
1195   PetscCall(PetscLayoutSetUp(A->cmap));
1196 
1197   /* check that the layout is as enforced by MatCreateScaLAPACK */
1198   PetscCall(MatScaLAPACKCheckLayout(A->rmap));
1199   PetscCall(MatScaLAPACKCheckLayout(A->cmap));
1200 
1201   /* compute local sizes */
1202   PetscCall(PetscBLASIntCast(A->rmap->N,&a->M));
1203   PetscCall(PetscBLASIntCast(A->cmap->N,&a->N));
1204   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1205   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1206   a->lld  = PetscMax(1,a->locr);
1207 
1208   /* allocate local array */
1209   PetscCall(MatScaLAPACKSetPreallocation(A));
1210 
1211   /* set up ScaLAPACK descriptor */
1212   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1213   PetscCheckScaLapackInfo("descinit",info);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1218 {
1219   PetscInt       nstash,reallocs;
1220 
1221   PetscFunctionBegin;
1222   if (A->nooffprocentries) PetscFunctionReturn(0);
1223   PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL));
1224   PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs));
1225   PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1230 {
1231   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1232   PetscMPIInt    n;
1233   PetscInt       i,flg,*row,*col;
1234   PetscScalar    *val;
1235   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1236 
1237   PetscFunctionBegin;
1238   if (A->nooffprocentries) PetscFunctionReturn(0);
1239   while (1) {
1240     PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg));
1241     if (!flg) break;
1242     for (i=0;i<n;i++) {
1243       PetscCall(PetscBLASIntCast(row[i]+1,&gridx));
1244       PetscCall(PetscBLASIntCast(col[i]+1,&gcidx));
1245       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1246       PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1247       switch (A->insertmode) {
1248         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1249         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1250         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1251       }
1252     }
1253   }
1254   PetscCall(MatStashScatterEnd_Private(&A->stash));
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1259 {
1260   Mat            Adense,As;
1261   MPI_Comm       comm;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1265   PetscCall(MatCreate(comm,&Adense));
1266   PetscCall(MatSetType(Adense,MATDENSE));
1267   PetscCall(MatLoad(Adense,viewer));
1268   PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As));
1269   PetscCall(MatDestroy(&Adense));
1270   PetscCall(MatHeaderReplace(newMat,&As));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* -------------------------------------------------------------------*/
1275 static struct _MatOps MatOps_Values = {
1276        MatSetValues_ScaLAPACK,
1277        0,
1278        0,
1279        MatMult_ScaLAPACK,
1280 /* 4*/ MatMultAdd_ScaLAPACK,
1281        MatMultTranspose_ScaLAPACK,
1282        MatMultTransposeAdd_ScaLAPACK,
1283        MatSolve_ScaLAPACK,
1284        MatSolveAdd_ScaLAPACK,
1285        0,
1286 /*10*/ 0,
1287        MatLUFactor_ScaLAPACK,
1288        MatCholeskyFactor_ScaLAPACK,
1289        0,
1290        MatTranspose_ScaLAPACK,
1291 /*15*/ MatGetInfo_ScaLAPACK,
1292        0,
1293        MatGetDiagonal_ScaLAPACK,
1294        MatDiagonalScale_ScaLAPACK,
1295        MatNorm_ScaLAPACK,
1296 /*20*/ MatAssemblyBegin_ScaLAPACK,
1297        MatAssemblyEnd_ScaLAPACK,
1298        MatSetOption_ScaLAPACK,
1299        MatZeroEntries_ScaLAPACK,
1300 /*24*/ 0,
1301        MatLUFactorSymbolic_ScaLAPACK,
1302        MatLUFactorNumeric_ScaLAPACK,
1303        MatCholeskyFactorSymbolic_ScaLAPACK,
1304        MatCholeskyFactorNumeric_ScaLAPACK,
1305 /*29*/ MatSetUp_ScaLAPACK,
1306        0,
1307        0,
1308        0,
1309        0,
1310 /*34*/ MatDuplicate_ScaLAPACK,
1311        0,
1312        0,
1313        0,
1314        0,
1315 /*39*/ MatAXPY_ScaLAPACK,
1316        0,
1317        0,
1318        0,
1319        MatCopy_ScaLAPACK,
1320 /*44*/ 0,
1321        MatScale_ScaLAPACK,
1322        MatShift_ScaLAPACK,
1323        0,
1324        0,
1325 /*49*/ 0,
1326        0,
1327        0,
1328        0,
1329        0,
1330 /*54*/ 0,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*59*/ 0,
1336        MatDestroy_ScaLAPACK,
1337        MatView_ScaLAPACK,
1338        0,
1339        0,
1340 /*64*/ 0,
1341        0,
1342        0,
1343        0,
1344        0,
1345 /*69*/ 0,
1346        0,
1347        MatConvert_ScaLAPACK_Dense,
1348        0,
1349        0,
1350 /*74*/ 0,
1351        0,
1352        0,
1353        0,
1354        0,
1355 /*79*/ 0,
1356        0,
1357        0,
1358        0,
1359        MatLoad_ScaLAPACK,
1360 /*84*/ 0,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*89*/ 0,
1366        0,
1367        MatMatMultNumeric_ScaLAPACK,
1368        0,
1369        0,
1370 /*94*/ 0,
1371        0,
1372        0,
1373        MatMatTransposeMultNumeric_ScaLAPACK,
1374        0,
1375 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1376        0,
1377        0,
1378        MatConjugate_ScaLAPACK,
1379        0,
1380 /*104*/0,
1381        0,
1382        0,
1383        0,
1384        0,
1385 /*109*/MatMatSolve_ScaLAPACK,
1386        0,
1387        0,
1388        0,
1389        MatMissingDiagonal_ScaLAPACK,
1390 /*114*/0,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*119*/0,
1396        MatHermitianTranspose_ScaLAPACK,
1397        0,
1398        0,
1399        0,
1400 /*124*/0,
1401        0,
1402        0,
1403        0,
1404        0,
1405 /*129*/0,
1406        0,
1407        0,
1408        0,
1409        0,
1410 /*134*/0,
1411        0,
1412        0,
1413        0,
1414        0,
1415        0,
1416 /*140*/0,
1417        0,
1418        0,
1419        0,
1420        0,
1421 /*145*/0,
1422        0,
1423        0,
1424        0,
1425        0
1426 };
1427 
1428 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1429 {
1430   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1431   PetscInt           size=stash->size,nsends;
1432   PetscInt           count,*sindices,**rindices,i,j,l;
1433   PetscScalar        **rvalues,*svalues;
1434   MPI_Comm           comm = stash->comm;
1435   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1436   PetscMPIInt        *sizes,*nlengths,nreceives;
1437   PetscInt           *sp_idx,*sp_idy;
1438   PetscScalar        *sp_val;
1439   PetscMatStashSpace space,space_next;
1440   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1441   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1442 
1443   PetscFunctionBegin;
1444   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1445     InsertMode addv;
1446     PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat)));
1447     PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1448     mat->insertmode = addv; /* in case this processor had no cache */
1449   }
1450 
1451   bs2 = stash->bs*stash->bs;
1452 
1453   /*  first count number of contributors to each processor */
1454   PetscCall(PetscCalloc1(size,&nlengths));
1455   PetscCall(PetscMalloc1(stash->n+1,&owner));
1456 
1457   i     = j    = 0;
1458   space = stash->space_head;
1459   while (space) {
1460     space_next = space->next;
1461     for (l=0; l<space->local_used; l++) {
1462       PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx));
1463       PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx));
1464       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1465       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1466       nlengths[j]++; owner[i] = j;
1467       i++;
1468     }
1469     space = space_next;
1470   }
1471 
1472   /* Now check what procs get messages - and compute nsends. */
1473   PetscCall(PetscCalloc1(size,&sizes));
1474   for (i=0, nsends=0; i<size; i++) {
1475     if (nlengths[i]) {
1476       sizes[i] = 1; nsends++;
1477     }
1478   }
1479 
1480   {PetscMPIInt *onodes,*olengths;
1481    /* Determine the number of messages to expect, their lengths, from from-ids */
1482    PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives));
1483    PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths));
1484    /* since clubbing row,col - lengths are multiplied by 2 */
1485    for (i=0; i<nreceives; i++) olengths[i] *=2;
1486    PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1));
1487    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1488    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1489    PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2));
1490    PetscCall(PetscFree(onodes));
1491    PetscCall(PetscFree(olengths));}
1492 
1493   /* do sends:
1494       1) starts[i] gives the starting index in svalues for stuff going to
1495          the ith processor
1496   */
1497   PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices));
1498   PetscCall(PetscMalloc1(2*nsends,&send_waits));
1499   PetscCall(PetscMalloc2(size,&startv,size,&starti));
1500   /* use 2 sends the first with all_a, the next with all_i and all_j */
1501   startv[0] = 0; starti[0] = 0;
1502   for (i=1; i<size; i++) {
1503     startv[i] = startv[i-1] + nlengths[i-1];
1504     starti[i] = starti[i-1] + 2*nlengths[i-1];
1505   }
1506 
1507   i     = 0;
1508   space = stash->space_head;
1509   while (space) {
1510     space_next = space->next;
1511     sp_idx     = space->idx;
1512     sp_idy     = space->idy;
1513     sp_val     = space->val;
1514     for (l=0; l<space->local_used; l++) {
1515       j = owner[i];
1516       if (bs2 == 1) {
1517         svalues[startv[j]] = sp_val[l];
1518       } else {
1519         PetscInt    k;
1520         PetscScalar *buf1,*buf2;
1521         buf1 = svalues+bs2*startv[j];
1522         buf2 = space->val + bs2*l;
1523         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1524       }
1525       sindices[starti[j]]             = sp_idx[l];
1526       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1527       startv[j]++;
1528       starti[j]++;
1529       i++;
1530     }
1531     space = space_next;
1532   }
1533   startv[0] = 0;
1534   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1535 
1536   for (i=0,count=0; i<size; i++) {
1537     if (sizes[i]) {
1538       PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++));
1539       PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++));
1540     }
1541   }
1542 #if defined(PETSC_USE_INFO)
1543   PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends));
1544   for (i=0; i<size; i++) {
1545     if (sizes[i]) {
1546       PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)))));
1547     }
1548   }
1549 #endif
1550   PetscCall(PetscFree(nlengths));
1551   PetscCall(PetscFree(owner));
1552   PetscCall(PetscFree2(startv,starti));
1553   PetscCall(PetscFree(sizes));
1554 
1555   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1556   PetscCall(PetscMalloc1(2*nreceives,&recv_waits));
1557 
1558   for (i=0; i<nreceives; i++) {
1559     recv_waits[2*i]   = recv_waits1[i];
1560     recv_waits[2*i+1] = recv_waits2[i];
1561   }
1562   stash->recv_waits = recv_waits;
1563 
1564   PetscCall(PetscFree(recv_waits1));
1565   PetscCall(PetscFree(recv_waits2));
1566 
1567   stash->svalues         = svalues;
1568   stash->sindices        = sindices;
1569   stash->rvalues         = rvalues;
1570   stash->rindices        = rindices;
1571   stash->send_waits      = send_waits;
1572   stash->nsends          = nsends;
1573   stash->nrecvs          = nreceives;
1574   stash->reproduce_count = 0;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1579 {
1580   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1581 
1582   PetscFunctionBegin;
1583   PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1584   PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb);
1585   PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb);
1586   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1587   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 /*@
1592    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1593    the ScaLAPACK matrix
1594 
1595    Logically Collective on A
1596 
1597    Input Parameters:
1598 +  A  - a MATSCALAPACK matrix
1599 .  mb - the row block size
1600 -  nb - the column block size
1601 
1602    Level: intermediate
1603 
1604 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1605 @*/
1606 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1607 {
1608   PetscFunctionBegin;
1609   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1610   PetscValidLogicalCollectiveInt(A,mb,2);
1611   PetscValidLogicalCollectiveInt(A,nb,3);
1612   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1617 {
1618   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1619 
1620   PetscFunctionBegin;
1621   if (mb) *mb = a->mb;
1622   if (nb) *nb = a->nb;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 /*@
1627    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1628    the ScaLAPACK matrix
1629 
1630    Not collective
1631 
1632    Input Parameter:
1633 .  A  - a MATSCALAPACK matrix
1634 
1635    Output Parameters:
1636 +  mb - the row block size
1637 -  nb - the column block size
1638 
1639    Level: intermediate
1640 
1641 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1642 @*/
1643 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1644 {
1645   PetscFunctionBegin;
1646   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1647   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1652 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1653 
1654 /*MC
1655    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1656 
1657    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1658 
1659    Options Database Keys:
1660 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1661 .  -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu
1662 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1663 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1664 
1665   Note:
1666    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1667    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1668    the given rank.
1669 
1670    Level: beginner
1671 
1672 .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`
1673 M*/
1674 
1675 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1676 {
1677   Mat_ScaLAPACK      *a;
1678   PetscBool          flg,flg1;
1679   Mat_ScaLAPACK_Grid *grid;
1680   MPI_Comm           icomm;
1681   PetscBLASInt       nprow,npcol,myrow,mycol;
1682   PetscInt           optv1,k=2,array[2]={0,0};
1683   PetscMPIInt        size;
1684 
1685   PetscFunctionBegin;
1686   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1687   A->insertmode = NOT_SET_VALUES;
1688 
1689   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash));
1690   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1691   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1692   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1693   A->stash.ScatterDestroy = NULL;
1694 
1695   PetscCall(PetscNewLog(A,&a));
1696   A->data = (void*)a;
1697 
1698   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1699   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1700     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0));
1701     PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1702     PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite));
1703   }
1704   PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL));
1705   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg));
1706   if (!flg) {
1707     PetscCall(PetscNewLog(A,&grid));
1708 
1709     PetscCallMPI(MPI_Comm_size(icomm,&size));
1710     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1711 
1712     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1713     PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1));
1714     if (flg1) {
1715       PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size);
1716       grid->nprow = optv1;
1717     }
1718     PetscOptionsEnd();
1719 
1720     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1721     grid->npcol = size/grid->nprow;
1722     PetscCall(PetscBLASIntCast(grid->nprow,&nprow));
1723     PetscCall(PetscBLASIntCast(grid->npcol,&npcol));
1724     grid->ictxt = Csys2blacs_handle(icomm);
1725     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1726     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1727     grid->grid_refct = 1;
1728     grid->nprow      = nprow;
1729     grid->npcol      = npcol;
1730     grid->myrow      = myrow;
1731     grid->mycol      = mycol;
1732     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1733     grid->ictxrow = Csys2blacs_handle(icomm);
1734     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1735     grid->ictxcol = Csys2blacs_handle(icomm);
1736     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1737     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid));
1738 
1739   } else grid->grid_refct++;
1740   PetscCall(PetscCommDestroy(&icomm));
1741   a->grid = grid;
1742   a->mb   = DEFAULT_BLOCKSIZE;
1743   a->nb   = DEFAULT_BLOCKSIZE;
1744 
1745   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1746   PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg));
1747   if (flg) {
1748     a->mb = array[0];
1749     a->nb = (k>1)? array[1]: a->mb;
1750   }
1751   PetscOptionsEnd();
1752 
1753   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK));
1754   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK));
1756   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK));
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 /*@C
1761    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1762    (2D block cyclic distribution).
1763 
1764    Collective
1765 
1766    Input Parameters:
1767 +  comm - MPI communicator
1768 .  mb   - row block size (or PETSC_DECIDE to have it set)
1769 .  nb   - column block size (or PETSC_DECIDE to have it set)
1770 .  M    - number of global rows
1771 .  N    - number of global columns
1772 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1773 -  csrc - coordinate of process that owns the first column of the distributed matrix
1774 
1775    Output Parameter:
1776 .  A - the matrix
1777 
1778    Options Database Keys:
1779 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1780 
1781    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1782    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1783    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1784 
1785    Notes:
1786    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1787    is chosen.
1788 
1789    Storage Information:
1790    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1791    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1792    significance and are thus ignored. The block sizes refer to the values
1793    used for the distributed matrix, not the same meaning as in BAIJ.
1794 
1795    Level: intermediate
1796 
1797 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1798 @*/
1799 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1800 {
1801   Mat_ScaLAPACK  *a;
1802   PetscInt       m,n;
1803 
1804   PetscFunctionBegin;
1805   PetscCall(MatCreate(comm,A));
1806   PetscCall(MatSetType(*A,MATSCALAPACK));
1807   PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1808   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1809   m = PETSC_DECIDE;
1810   PetscCall(PetscSplitOwnershipEqual(comm,&m,&M));
1811   n = PETSC_DECIDE;
1812   PetscCall(PetscSplitOwnershipEqual(comm,&n,&N));
1813   PetscCall(MatSetSizes(*A,m,n,M,N));
1814   a = (Mat_ScaLAPACK*)(*A)->data;
1815   PetscCall(PetscBLASIntCast(M,&a->M));
1816   PetscCall(PetscBLASIntCast(N,&a->N));
1817   PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb));
1818   PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb));
1819   PetscCall(PetscBLASIntCast(rsrc,&a->rsrc));
1820   PetscCall(PetscBLASIntCast(csrc,&a->csrc));
1821   PetscCall(MatSetUp(*A));
1822   PetscFunctionReturn(0);
1823 }
1824