xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
28   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
33 {
34   PetscErrorCode    ierr;
35   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
36   PetscBool         iascii;
37   PetscViewerFormat format;
38   Mat               Adense;
39 
40   PetscFunctionBegin;
41   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
42   if (iascii) {
43     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
44     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
45       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
46       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
47       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
48       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
49       PetscFunctionReturn(0);
50     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
51       PetscFunctionReturn(0);
52     }
53   }
54   /* convert to dense format and call MatView() */
55   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
56   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
57   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
62 {
63   PetscErrorCode ierr;
64   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
65   PetscLogDouble isend[2],irecv[2];
66 
67   PetscFunctionBegin;
68   info->block_size = 1.0;
69 
70   isend[0] = a->lld*a->locc;     /* locally allocated */
71   isend[1] = a->locr*a->locc;    /* used submatrix */
72   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
73     info->nz_allocated   = isend[0];
74     info->nz_used        = isend[1];
75   } else if (flag == MAT_GLOBAL_MAX) {
76     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   } else if (flag == MAT_GLOBAL_SUM) {
80     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
81     info->nz_allocated   = irecv[0];
82     info->nz_used        = irecv[1];
83   }
84 
85   info->nz_unneeded       = 0;
86   info->assemblies        = A->num_ass;
87   info->mallocs           = 0;
88   info->memory            = ((PetscObject)A)->mem;
89   info->fill_ratio_given  = 0;
90   info->fill_ratio_needed = 0;
91   info->factor_mallocs    = 0;
92   PetscFunctionReturn(0);
93 }
94 
95 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
96 {
97   PetscFunctionBegin;
98   switch (op) {
99     case MAT_NEW_NONZERO_LOCATIONS:
100     case MAT_NEW_NONZERO_LOCATION_ERR:
101     case MAT_NEW_NONZERO_ALLOCATION_ERR:
102     case MAT_SYMMETRIC:
103     case MAT_SORTED_FULL:
104     case MAT_HERMITIAN:
105       break;
106     default:
107       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
108   }
109   PetscFunctionReturn(0);
110 }
111 
112 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
113 {
114   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
115   PetscErrorCode ierr;
116   PetscInt       i,j;
117   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
118 
119   PetscFunctionBegin;
120   for (i=0;i<nr;i++) {
121     if (rows[i] < 0) continue;
122     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
123     for (j=0;j<nc;j++) {
124       if (cols[j] < 0) continue;
125       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
126       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
127       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
128         switch (imode) {
129           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
130           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
131           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
132         }
133       } else {
134         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
135         A->assembled = PETSC_FALSE;
136         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
144 {
145   PetscErrorCode ierr;
146   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
147   PetscScalar    *x2d,*y2d,alpha=1.0;
148   const PetscInt *ranges;
149   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
150 
151   PetscFunctionBegin;
152   if (transpose) {
153 
154     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
155     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
156     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
157     xlld = PetscMax(1,A->rmap->n);
158     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
159     PetscCheckScaLapackInfo("descinit",info);
160     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
161     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
162     ylld = 1;
163     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
164     PetscCheckScaLapackInfo("descinit",info);
165 
166     /* allocate 2d vectors */
167     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
168     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
169     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
170     xlld = PetscMax(1,lszx);
171 
172     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
173     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
174     PetscCheckScaLapackInfo("descinit",info);
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177 
178     /* redistribute x as a column of a 2d matrix */
179     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
180 
181     /* redistribute y as a row of a 2d matrix */
182     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
183 
184     /* call PBLAS subroutine */
185     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
186 
187     /* redistribute y from a row of a 2d matrix */
188     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
189 
190   } else {   /* non-transpose */
191 
192     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
193     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
194     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
195     xlld = 1;
196     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
197     PetscCheckScaLapackInfo("descinit",info);
198     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
199     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
200     ylld = PetscMax(1,A->rmap->n);
201     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
202     PetscCheckScaLapackInfo("descinit",info);
203 
204     /* allocate 2d vectors */
205     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
206     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
207     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
208     ylld = PetscMax(1,lszy);
209 
210     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
211     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
212     PetscCheckScaLapackInfo("descinit",info);
213     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
214     PetscCheckScaLapackInfo("descinit",info);
215 
216     /* redistribute x as a row of a 2d matrix */
217     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
218 
219     /* redistribute y as a column of a 2d matrix */
220     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
221 
222     /* call PBLAS subroutine */
223     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
224 
225     /* redistribute y from a column of a 2d matrix */
226     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
227 
228   }
229   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
234 {
235   PetscErrorCode    ierr;
236   const PetscScalar *xarray;
237   PetscScalar       *yarray;
238 
239   PetscFunctionBegin;
240   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
241   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
242   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
243   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
244   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
249 {
250   PetscErrorCode    ierr;
251   const PetscScalar *xarray;
252   PetscScalar       *yarray;
253 
254   PetscFunctionBegin;
255   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
256   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
257   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
258   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
259   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
264 {
265   PetscErrorCode    ierr;
266   const PetscScalar *xarray;
267   PetscScalar       *zarray;
268 
269   PetscFunctionBegin;
270   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
271   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
272   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
273   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
274   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
275   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
280 {
281   PetscErrorCode    ierr;
282   const PetscScalar *xarray;
283   PetscScalar       *zarray;
284 
285   PetscFunctionBegin;
286   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
287   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
288   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
289   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
290   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
291   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
296 {
297   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
298   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
299   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
300   PetscScalar   sone=1.0,zero=0.0;
301   PetscBLASInt  one=1;
302 
303   PetscFunctionBegin;
304   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
305   C->assembled = PETSC_TRUE;
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
310 {
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
315   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
316   ierr = MatSetUp(C);CHKERRQ(ierr);
317   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
318   PetscFunctionReturn(0);
319 }
320 
321 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
322 {
323   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
324   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
325   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
326   PetscScalar   sone=1.0,zero=0.0;
327   PetscBLASInt  one=1;
328 
329   PetscFunctionBegin;
330   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
331   C->assembled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
336 {
337   PetscErrorCode ierr;
338 
339   PetscFunctionBegin;
340   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
341   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
342   ierr = MatSetUp(C);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* --------------------------------------- */
347 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
348 {
349   PetscFunctionBegin;
350   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
351   C->ops->productsymbolic = MatProductSymbolic_AB;
352   PetscFunctionReturn(0);
353 }
354 
355 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
356 {
357   PetscFunctionBegin;
358   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
359   C->ops->productsymbolic          = MatProductSymbolic_ABt;
360   PetscFunctionReturn(0);
361 }
362 
363 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
364 {
365   PetscErrorCode ierr;
366   Mat_Product    *product = C->product;
367 
368   PetscFunctionBegin;
369   switch (product->type) {
370     case MATPRODUCT_AB:
371       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
372       break;
373     case MATPRODUCT_ABt:
374       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
375       break;
376     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
377   }
378   PetscFunctionReturn(0);
379 }
380 /* --------------------------------------- */
381 
382 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
383 {
384   PetscErrorCode    ierr;
385   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
386   PetscScalar       *darray,*d2d,v;
387   const PetscInt    *ranges;
388   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
389 
390   PetscFunctionBegin;
391   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
392 
393   if (A->rmap->N<=A->cmap->N) {   /* row version */
394 
395     /* create ScaLAPACK descriptor for vector (1d block distribution) */
396     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
397     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
398     dlld = PetscMax(1,A->rmap->n);
399     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
400     PetscCheckScaLapackInfo("descinit",info);
401 
402     /* allocate 2d vector */
403     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
404     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
405     dlld = PetscMax(1,lszd);
406 
407     /* create ScaLAPACK descriptor for vector (2d block distribution) */
408     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
409     PetscCheckScaLapackInfo("descinit",info);
410 
411     /* collect diagonal */
412     for (j=1;j<=a->M;j++) {
413       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
414       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
415     }
416 
417     /* redistribute d from a column of a 2d matrix */
418     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
419     ierr = PetscFree(d2d);CHKERRQ(ierr);
420 
421   } else {   /* column version */
422 
423     /* create ScaLAPACK descriptor for vector (1d block distribution) */
424     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
425     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
426     dlld = 1;
427     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
428     PetscCheckScaLapackInfo("descinit",info);
429 
430     /* allocate 2d vector */
431     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
432     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
433 
434     /* create ScaLAPACK descriptor for vector (2d block distribution) */
435     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
436     PetscCheckScaLapackInfo("descinit",info);
437 
438     /* collect diagonal */
439     for (j=1;j<=a->N;j++) {
440       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
441       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
442     }
443 
444     /* redistribute d from a row of a 2d matrix */
445     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
446     ierr = PetscFree(d2d);CHKERRQ(ierr);
447   }
448 
449   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
450   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
451   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
456 {
457   PetscErrorCode    ierr;
458   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
459   const PetscScalar *d;
460   const PetscInt    *ranges;
461   PetscScalar       *d2d;
462   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
463 
464   PetscFunctionBegin;
465   if (R) {
466     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
467     /* create ScaLAPACK descriptor for vector (1d block distribution) */
468     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
469     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
470     dlld = 1;
471     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
472     PetscCheckScaLapackInfo("descinit",info);
473 
474     /* allocate 2d vector */
475     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
476     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
477 
478     /* create ScaLAPACK descriptor for vector (2d block distribution) */
479     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
480     PetscCheckScaLapackInfo("descinit",info);
481 
482     /* redistribute d to a row of a 2d matrix */
483     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
484 
485     /* broadcast along process columns */
486     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
487     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
488 
489     /* local scaling */
490     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
491 
492     ierr = PetscFree(d2d);CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
494   }
495   if (L) {
496     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
497     /* create ScaLAPACK descriptor for vector (1d block distribution) */
498     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
499     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
500     dlld = PetscMax(1,A->rmap->n);
501     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
502     PetscCheckScaLapackInfo("descinit",info);
503 
504     /* allocate 2d vector */
505     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
506     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
507     dlld = PetscMax(1,lszd);
508 
509     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
511     PetscCheckScaLapackInfo("descinit",info);
512 
513     /* redistribute d to a column of a 2d matrix */
514     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
515 
516     /* broadcast along process rows */
517     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
518     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
519 
520     /* local scaling */
521     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
522 
523     ierr = PetscFree(d2d);CHKERRQ(ierr);
524     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
530 {
531   PetscFunctionBegin;
532   *missing = PETSC_FALSE;
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  n,one=1;
540 
541   PetscFunctionBegin;
542   n = x->lld*x->locc;
543   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
544   PetscFunctionReturn(0);
545 }
546 
547 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
548 {
549   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
550   PetscBLASInt  i,n;
551   PetscScalar   v;
552 
553   PetscFunctionBegin;
554   n = PetscMin(x->M,x->N);
555   for (i=1;i<=n;i++) {
556     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
557     v += alpha;
558     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
564 {
565   PetscErrorCode ierr;
566   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
567   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
568   PetscBLASInt   one=1;
569   PetscScalar    beta=1.0;
570 
571   PetscFunctionBegin;
572   MatScaLAPACKCheckDistribution(Y,1,X,3);
573   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
574   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
579 {
580   PetscErrorCode ierr;
581   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
582   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
583 
584   PetscFunctionBegin;
585   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
586   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
591 {
592   Mat            Bs;
593   MPI_Comm       comm;
594   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
595   PetscErrorCode ierr;
596 
597   PetscFunctionBegin;
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
600   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
601   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
602   b = (Mat_ScaLAPACK*)Bs->data;
603   b->M    = a->M;
604   b->N    = a->N;
605   b->mb   = a->mb;
606   b->nb   = a->nb;
607   b->rsrc = a->rsrc;
608   b->csrc = a->csrc;
609   ierr = MatSetUp(Bs);CHKERRQ(ierr);
610   *B = Bs;
611   if (op == MAT_COPY_VALUES) {
612     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
613   }
614   Bs->assembled = PETSC_TRUE;
615   PetscFunctionReturn(0);
616 }
617 
618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
619 {
620   PetscErrorCode ierr;
621   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
622   Mat            Bs = *B;
623   PetscBLASInt   one=1;
624   PetscScalar    sone=1.0,zero=0.0;
625 #if defined(PETSC_USE_COMPLEX)
626   PetscInt       i,j;
627 #endif
628 
629   PetscFunctionBegin;
630   if (reuse == MAT_INITIAL_MATRIX) {
631     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
632     *B = Bs;
633   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
634   b = (Mat_ScaLAPACK*)Bs->data;
635   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
636 #if defined(PETSC_USE_COMPLEX)
637   /* undo conjugation */
638   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
639 #endif
640   Bs->assembled = PETSC_TRUE;
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645 {
646   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
647   PetscInt      i,j;
648 
649   PetscFunctionBegin;
650   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
651   PetscFunctionReturn(0);
652 }
653 
654 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
655 {
656   PetscErrorCode ierr;
657   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
658   Mat            Bs = *B;
659   PetscBLASInt   one=1;
660   PetscScalar    sone=1.0,zero=0.0;
661 
662   PetscFunctionBegin;
663   if (reuse == MAT_INITIAL_MATRIX) {
664     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
665     *B = Bs;
666   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
667   b = (Mat_ScaLAPACK*)Bs->data;
668   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
669   Bs->assembled = PETSC_TRUE;
670   PetscFunctionReturn(0);
671 }
672 
673 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
674 {
675   PetscErrorCode ierr;
676   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
677   PetscScalar    *x,*x2d;
678   const PetscInt *ranges;
679   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
680 
681   PetscFunctionBegin;
682   ierr = VecCopy(B,X);CHKERRQ(ierr);
683   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
684 
685   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
687   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
688   xlld = PetscMax(1,A->rmap->n);
689   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
690   PetscCheckScaLapackInfo("descinit",info);
691 
692   /* allocate 2d vector */
693   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
694   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
695   xlld = PetscMax(1,lszx);
696 
697   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
699   PetscCheckScaLapackInfo("descinit",info);
700 
701   /* redistribute x as a column of a 2d matrix */
702   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
703 
704   /* call ScaLAPACK subroutine */
705   switch (A->factortype) {
706     case MAT_FACTOR_LU:
707       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
708       PetscCheckScaLapackInfo("getrs",info);
709       break;
710     case MAT_FACTOR_CHOLESKY:
711       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
712       PetscCheckScaLapackInfo("potrs",info);
713       break;
714     default:
715       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
716   }
717 
718   /* redistribute x from a column of a 2d matrix */
719   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
720 
721   ierr = PetscFree(x2d);CHKERRQ(ierr);
722   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
727 {
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
732   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 
736 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
737 {
738   PetscErrorCode ierr;
739   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
740   PetscBool      flg1,flg2;
741   PetscBLASInt   one=1,info;
742 
743   PetscFunctionBegin;
744   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
745   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
746   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
747   MatScaLAPACKCheckDistribution(B,1,X,2);
748   b = (Mat_ScaLAPACK*)B->data;
749   x = (Mat_ScaLAPACK*)X->data;
750   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
751 
752   switch (A->factortype) {
753     case MAT_FACTOR_LU:
754       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
755       PetscCheckScaLapackInfo("getrs",info);
756       break;
757     case MAT_FACTOR_CHOLESKY:
758       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
759       PetscCheckScaLapackInfo("potrs",info);
760       break;
761     default:
762       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
768 {
769   PetscErrorCode ierr;
770   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
771   PetscBLASInt   one=1,info;
772 
773   PetscFunctionBegin;
774   if (!a->pivots) {
775     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
776     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
777   }
778   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
779   PetscCheckScaLapackInfo("getrf",info);
780   A->factortype = MAT_FACTOR_LU;
781   A->assembled  = PETSC_TRUE;
782 
783   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
784   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
789 {
790   PetscErrorCode ierr;
791 
792   PetscFunctionBegin;
793   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
794   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
799 {
800   PetscFunctionBegin;
801   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
802   PetscFunctionReturn(0);
803 }
804 
805 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
806 {
807   PetscErrorCode ierr;
808   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
809   PetscBLASInt   one=1,info;
810 
811   PetscFunctionBegin;
812   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
813   PetscCheckScaLapackInfo("potrf",info);
814   A->factortype = MAT_FACTOR_CHOLESKY;
815   A->assembled  = PETSC_TRUE;
816 
817   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
818   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
823 {
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
828   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
833 {
834   PetscFunctionBegin;
835   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
840 {
841   PetscFunctionBegin;
842   *type = MATSOLVERSCALAPACK;
843   PetscFunctionReturn(0);
844 }
845 
846 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
847 {
848   Mat            B;
849   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
850   PetscErrorCode ierr;
851 
852   PetscFunctionBegin;
853   /* Create the factorization matrix */
854   ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr);
855   B->trivialsymbolic = PETSC_TRUE;
856   B->factortype = ftype;
857   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
858   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
859 
860   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
861   *F = B;
862   PetscFunctionReturn(0);
863 }
864 
865 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
866 {
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
871   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
876 {
877   PetscErrorCode ierr;
878   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
879   PetscBLASInt   one=1,lwork=0;
880   const char     *ntype;
881   PetscScalar    *work=NULL,dummy;
882 
883   PetscFunctionBegin;
884   switch (type) {
885     case NORM_1:
886       ntype = "1";
887       lwork = PetscMax(a->locr,a->locc);
888       break;
889     case NORM_FROBENIUS:
890       ntype = "F";
891       work  = &dummy;
892       break;
893     case NORM_INFINITY:
894       ntype = "I";
895       lwork = PetscMax(a->locr,a->locc);
896       break;
897     default:
898       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
899   }
900   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
901   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
902   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
903   PetscFunctionReturn(0);
904 }
905 
906 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
907 {
908   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
909   PetscErrorCode ierr;
910 
911   PetscFunctionBegin;
912   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
913   PetscFunctionReturn(0);
914 }
915 
916 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
917 {
918   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
919   PetscErrorCode ierr;
920   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
921 
922   PetscFunctionBegin;
923   if (rows) {
924     n     = a->locr;
925     nb    = a->mb;
926     isrc  = a->rsrc;
927     nproc = a->grid->nprow;
928     iproc = a->grid->myrow;
929     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
930     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
931     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
932   }
933   if (cols) {
934     n     = a->locc;
935     nb    = a->nb;
936     isrc  = a->csrc;
937     nproc = a->grid->npcol;
938     iproc = a->grid->mycol;
939     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
940     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
941     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
947 {
948   PetscErrorCode ierr;
949   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
950   Mat            Bmpi;
951   MPI_Comm       comm;
952   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
953   const PetscInt *ranges,*branges,*cwork;
954   const PetscScalar *vwork;
955   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
956   PetscScalar    *barray;
957   PetscBool      differ=PETSC_FALSE;
958   PetscMPIInt    size;
959 
960   PetscFunctionBegin;
961   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
962   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
963 
964   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
965     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
966     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
967     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
968   }
969 
970   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
971     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
972     m = PETSC_DECIDE;
973     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
974     n = PETSC_DECIDE;
975     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
976     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
977     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
978     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
979 
980     /* create ScaLAPACK descriptor for B (1d block distribution) */
981     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
982     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
983     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
984     PetscCheckScaLapackInfo("descinit",info);
985 
986     /* redistribute matrix */
987     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
988     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
989     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
990     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992 
993     /* transfer rows of auxiliary matrix to the final matrix B */
994     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
995     for (i=rstart;i<rend;i++) {
996       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
997       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
998       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
999     }
1000     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
1003 
1004   } else {  /* normal cases */
1005 
1006     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1007     else {
1008       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1009       m = PETSC_DECIDE;
1010       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1011       n = PETSC_DECIDE;
1012       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1013       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1014       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1015       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1016     }
1017 
1018     /* create ScaLAPACK descriptor for B (1d block distribution) */
1019     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1020     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1021     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1022     PetscCheckScaLapackInfo("descinit",info);
1023 
1024     /* redistribute matrix */
1025     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1026     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1027     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1028 
1029     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031     if (reuse == MAT_INPLACE_MATRIX) {
1032       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1033     } else *B = Bmpi;
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1039 {
1040   PetscErrorCode ierr;
1041   Mat_ScaLAPACK  *b;
1042   Mat            Bmpi;
1043   MPI_Comm       comm;
1044   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1045   const PetscInt *ranges;
1046   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1047   PetscScalar    *aarray;
1048   PetscInt       lda;
1049 
1050   PetscFunctionBegin;
1051   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1052 
1053   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1054   else {
1055     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1056     m = PETSC_DECIDE;
1057     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1058     n = PETSC_DECIDE;
1059     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1060     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1061     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1062     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1063   }
1064   b = (Mat_ScaLAPACK*)Bmpi->data;
1065 
1066   /* create ScaLAPACK descriptor for A (1d block distribution) */
1067   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1068   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1069   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1070   lld = PetscMax(lda,1);  /* local leading dimension */
1071   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1072   PetscCheckScaLapackInfo("descinit",info);
1073 
1074   /* redistribute matrix */
1075   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1076   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1077   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1078 
1079   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081   if (reuse == MAT_INPLACE_MATRIX) {
1082     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1083   } else *B = Bmpi;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1088 {
1089   Mat               mat_scal;
1090   PetscErrorCode    ierr;
1091   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1092   const PetscInt    *cols;
1093   const PetscScalar *vals;
1094 
1095   PetscFunctionBegin;
1096   if (reuse == MAT_REUSE_MATRIX) {
1097     mat_scal = *newmat;
1098     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1099   } else {
1100     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1101     m = PETSC_DECIDE;
1102     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1103     n = PETSC_DECIDE;
1104     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1105     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1106     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1107     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1108   }
1109   for (row=rstart;row<rend;row++) {
1110     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1111     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1112     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1113   }
1114   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1115   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1116 
1117   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1118   else *newmat = mat_scal;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1123 {
1124   Mat               mat_scal;
1125   PetscErrorCode    ierr;
1126   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1127   const PetscInt    *cols;
1128   const PetscScalar *vals;
1129   PetscScalar       v;
1130 
1131   PetscFunctionBegin;
1132   if (reuse == MAT_REUSE_MATRIX) {
1133     mat_scal = *newmat;
1134     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1135   } else {
1136     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1137     m = PETSC_DECIDE;
1138     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1139     n = PETSC_DECIDE;
1140     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1141     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1142     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1143     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1144   }
1145   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1146   for (row=rstart;row<rend;row++) {
1147     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1148     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1149     for (j=0;j<ncols;j++) { /* lower triangular part */
1150       if (cols[j] == row) continue;
1151       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1152       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1153     }
1154     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1155   }
1156   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1157   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159 
1160   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1161   else *newmat = mat_scal;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1166 {
1167   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1168   PetscErrorCode ierr;
1169   PetscInt       sz=0;
1170 
1171   PetscFunctionBegin;
1172   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1173   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1174   if (!a->lld) a->lld = a->locr;
1175 
1176   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1177   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1178   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1179   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1180 
1181   A->preallocated = PETSC_TRUE;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1186 {
1187   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1188   PetscErrorCode     ierr;
1189   Mat_ScaLAPACK_Grid *grid;
1190   PetscBool          flg;
1191   MPI_Comm           icomm;
1192 
1193   PetscFunctionBegin;
1194   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1195   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1196   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1197   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1198   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1199   if (--grid->grid_refct == 0) {
1200     Cblacs_gridexit(grid->ictxt);
1201     Cblacs_gridexit(grid->ictxrow);
1202     Cblacs_gridexit(grid->ictxcol);
1203     ierr = PetscFree(grid);CHKERRQ(ierr);
1204     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
1205   }
1206   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1216 {
1217   PetscErrorCode ierr;
1218   const PetscInt *ranges;
1219   PetscMPIInt    size;
1220   PetscInt       i,n;
1221 
1222   PetscFunctionBegin;
1223   ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr);
1224   if (size>2) {
1225     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1226     n = ranges[1]-ranges[0];
1227     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1228     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1234 {
1235   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1236   PetscErrorCode ierr;
1237   PetscBLASInt   info=0;
1238 
1239   PetscFunctionBegin;
1240   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1241   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1242 
1243   /* check that the layout is as enforced by MatCreateScaLAPACK */
1244   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1245   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1246 
1247   /* compute local sizes */
1248   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1249   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1250   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1251   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1252   a->lld  = PetscMax(1,a->locr);
1253 
1254   /* allocate local array */
1255   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1256 
1257   /* set up ScaLAPACK descriptor */
1258   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1259   PetscCheckScaLapackInfo("descinit",info);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1264 {
1265   PetscErrorCode ierr;
1266   PetscInt       nstash,reallocs;
1267 
1268   PetscFunctionBegin;
1269   if (A->nooffprocentries) PetscFunctionReturn(0);
1270   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1271   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1272   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1277 {
1278   PetscErrorCode ierr;
1279   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1280   PetscMPIInt    n;
1281   PetscInt       i,flg,*row,*col;
1282   PetscScalar    *val;
1283   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1284 
1285   PetscFunctionBegin;
1286   if (A->nooffprocentries) PetscFunctionReturn(0);
1287   while (1) {
1288     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1289     if (!flg) break;
1290     for (i=0;i<n;i++) {
1291       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1292       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1293       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1294       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1295       switch (A->insertmode) {
1296         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1297         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1298         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1299       }
1300     }
1301   }
1302   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1307 {
1308   PetscErrorCode ierr;
1309   Mat            Adense,As;
1310   MPI_Comm       comm;
1311 
1312   PetscFunctionBegin;
1313   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1314   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1315   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1316   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1317   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1318   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1319   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /* -------------------------------------------------------------------*/
1324 static struct _MatOps MatOps_Values = {
1325        MatSetValues_ScaLAPACK,
1326        0,
1327        0,
1328        MatMult_ScaLAPACK,
1329 /* 4*/ MatMultAdd_ScaLAPACK,
1330        MatMultTranspose_ScaLAPACK,
1331        MatMultTransposeAdd_ScaLAPACK,
1332        MatSolve_ScaLAPACK,
1333        MatSolveAdd_ScaLAPACK,
1334        0,
1335 /*10*/ 0,
1336        MatLUFactor_ScaLAPACK,
1337        MatCholeskyFactor_ScaLAPACK,
1338        0,
1339        MatTranspose_ScaLAPACK,
1340 /*15*/ MatGetInfo_ScaLAPACK,
1341        0,
1342        MatGetDiagonal_ScaLAPACK,
1343        MatDiagonalScale_ScaLAPACK,
1344        MatNorm_ScaLAPACK,
1345 /*20*/ MatAssemblyBegin_ScaLAPACK,
1346        MatAssemblyEnd_ScaLAPACK,
1347        MatSetOption_ScaLAPACK,
1348        MatZeroEntries_ScaLAPACK,
1349 /*24*/ 0,
1350        MatLUFactorSymbolic_ScaLAPACK,
1351        MatLUFactorNumeric_ScaLAPACK,
1352        MatCholeskyFactorSymbolic_ScaLAPACK,
1353        MatCholeskyFactorNumeric_ScaLAPACK,
1354 /*29*/ MatSetUp_ScaLAPACK,
1355        0,
1356        0,
1357        0,
1358        0,
1359 /*34*/ MatDuplicate_ScaLAPACK,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*39*/ MatAXPY_ScaLAPACK,
1365        0,
1366        0,
1367        0,
1368        MatCopy_ScaLAPACK,
1369 /*44*/ 0,
1370        MatScale_ScaLAPACK,
1371        MatShift_ScaLAPACK,
1372        0,
1373        0,
1374 /*49*/ 0,
1375        0,
1376        0,
1377        0,
1378        0,
1379 /*54*/ 0,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*59*/ 0,
1385        MatDestroy_ScaLAPACK,
1386        MatView_ScaLAPACK,
1387        0,
1388        0,
1389 /*64*/ 0,
1390        0,
1391        0,
1392        0,
1393        0,
1394 /*69*/ 0,
1395        0,
1396        MatConvert_ScaLAPACK_Dense,
1397        0,
1398        0,
1399 /*74*/ 0,
1400        0,
1401        0,
1402        0,
1403        0,
1404 /*79*/ 0,
1405        0,
1406        0,
1407        0,
1408        MatLoad_ScaLAPACK,
1409 /*84*/ 0,
1410        0,
1411        0,
1412        0,
1413        0,
1414 /*89*/ 0,
1415        0,
1416        MatMatMultNumeric_ScaLAPACK,
1417        0,
1418        0,
1419 /*94*/ 0,
1420        0,
1421        0,
1422        MatMatTransposeMultNumeric_ScaLAPACK,
1423        0,
1424 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1425        0,
1426        0,
1427        MatConjugate_ScaLAPACK,
1428        0,
1429 /*104*/0,
1430        0,
1431        0,
1432        0,
1433        0,
1434 /*109*/MatMatSolve_ScaLAPACK,
1435        0,
1436        0,
1437        0,
1438        MatMissingDiagonal_ScaLAPACK,
1439 /*114*/0,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*119*/0,
1445        MatHermitianTranspose_ScaLAPACK,
1446        0,
1447        0,
1448        0,
1449 /*124*/0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*129*/0,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*134*/0,
1460        0,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*140*/0,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*145*/0,
1471        0,
1472        0
1473 };
1474 
1475 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1476 {
1477   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1478   PetscInt           size=stash->size,nsends;
1479   PetscErrorCode     ierr;
1480   PetscInt           count,*sindices,**rindices,i,j,l;
1481   PetscScalar        **rvalues,*svalues;
1482   MPI_Comm           comm = stash->comm;
1483   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1484   PetscMPIInt        *sizes,*nlengths,nreceives;
1485   PetscInt           *sp_idx,*sp_idy;
1486   PetscScalar        *sp_val;
1487   PetscMatStashSpace space,space_next;
1488   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1489   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1490 
1491   PetscFunctionBegin;
1492   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1493     InsertMode addv;
1494     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1495     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1496     mat->insertmode = addv; /* in case this processor had no cache */
1497   }
1498 
1499   bs2 = stash->bs*stash->bs;
1500 
1501   /*  first count number of contributors to each processor */
1502   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1503   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1504 
1505   i     = j    = 0;
1506   space = stash->space_head;
1507   while (space) {
1508     space_next = space->next;
1509     for (l=0; l<space->local_used; l++) {
1510       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1512       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1513       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1514       nlengths[j]++; owner[i] = j;
1515       i++;
1516     }
1517     space = space_next;
1518   }
1519 
1520   /* Now check what procs get messages - and compute nsends. */
1521   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1522   for (i=0, nsends=0; i<size; i++) {
1523     if (nlengths[i]) {
1524       sizes[i] = 1; nsends++;
1525     }
1526   }
1527 
1528   {PetscMPIInt *onodes,*olengths;
1529    /* Determine the number of messages to expect, their lengths, from from-ids */
1530    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1531    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1532    /* since clubbing row,col - lengths are multiplied by 2 */
1533    for (i=0; i<nreceives; i++) olengths[i] *=2;
1534    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1535    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1536    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1537    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1538    ierr = PetscFree(onodes);CHKERRQ(ierr);
1539    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1540 
1541   /* do sends:
1542       1) starts[i] gives the starting index in svalues for stuff going to
1543          the ith processor
1544   */
1545   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1546   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1547   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1548   /* use 2 sends the first with all_a, the next with all_i and all_j */
1549   startv[0] = 0; starti[0] = 0;
1550   for (i=1; i<size; i++) {
1551     startv[i] = startv[i-1] + nlengths[i-1];
1552     starti[i] = starti[i-1] + 2*nlengths[i-1];
1553   }
1554 
1555   i     = 0;
1556   space = stash->space_head;
1557   while (space) {
1558     space_next = space->next;
1559     sp_idx     = space->idx;
1560     sp_idy     = space->idy;
1561     sp_val     = space->val;
1562     for (l=0; l<space->local_used; l++) {
1563       j = owner[i];
1564       if (bs2 == 1) {
1565         svalues[startv[j]] = sp_val[l];
1566       } else {
1567         PetscInt    k;
1568         PetscScalar *buf1,*buf2;
1569         buf1 = svalues+bs2*startv[j];
1570         buf2 = space->val + bs2*l;
1571         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1572       }
1573       sindices[starti[j]]             = sp_idx[l];
1574       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1575       startv[j]++;
1576       starti[j]++;
1577       i++;
1578     }
1579     space = space_next;
1580   }
1581   startv[0] = 0;
1582   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1583 
1584   for (i=0,count=0; i<size; i++) {
1585     if (sizes[i]) {
1586       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
1587       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
1588     }
1589   }
1590 #if defined(PETSC_USE_INFO)
1591   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1592   for (i=0; i<size; i++) {
1593     if (sizes[i]) {
1594       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1595     }
1596   }
1597 #endif
1598   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1599   ierr = PetscFree(owner);CHKERRQ(ierr);
1600   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1601   ierr = PetscFree(sizes);CHKERRQ(ierr);
1602 
1603   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1604   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1605 
1606   for (i=0; i<nreceives; i++) {
1607     recv_waits[2*i]   = recv_waits1[i];
1608     recv_waits[2*i+1] = recv_waits2[i];
1609   }
1610   stash->recv_waits = recv_waits;
1611 
1612   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1613   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1614 
1615   stash->svalues         = svalues;
1616   stash->sindices        = sindices;
1617   stash->rvalues         = rvalues;
1618   stash->rindices        = rindices;
1619   stash->send_waits      = send_waits;
1620   stash->nsends          = nsends;
1621   stash->nrecvs          = nreceives;
1622   stash->reproduce_count = 0;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1627 {
1628   PetscErrorCode ierr;
1629   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1630 
1631   PetscFunctionBegin;
1632   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1633   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1634   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1635   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1636   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 /*@
1641    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1642    the ScaLAPACK matrix
1643 
1644    Logically Collective on A
1645 
1646    Input Parameters:
1647 +  A  - a MATSCALAPACK matrix
1648 .  mb - the row block size
1649 -  nb - the column block size
1650 
1651    Level: intermediate
1652 
1653 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1654 @*/
1655 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1656 {
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1661   PetscValidLogicalCollectiveInt(A,mb,2);
1662   PetscValidLogicalCollectiveInt(A,nb,3);
1663   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1668 {
1669   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1670 
1671   PetscFunctionBegin;
1672   if (mb) *mb = a->mb;
1673   if (nb) *nb = a->nb;
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 /*@
1678    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1679    the ScaLAPACK matrix
1680 
1681    Not collective
1682 
1683    Input Parameter:
1684 .  A  - a MATSCALAPACK matrix
1685 
1686    Output Parameters:
1687 +  mb - the row block size
1688 -  nb - the column block size
1689 
1690    Level: intermediate
1691 
1692 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1693 @*/
1694 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1695 {
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1700   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1705 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1706 
1707 /*MC
1708    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1709 
1710    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1711 
1712    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1713 
1714    Options Database Keys:
1715 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1716 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1717 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1718 
1719    Level: beginner
1720 
1721 .seealso: MATDENSE, MATELEMENTAL
1722 M*/
1723 
1724 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1725 {
1726   Mat_ScaLAPACK      *a;
1727   PetscErrorCode     ierr;
1728   PetscBool          flg,flg1;
1729   Mat_ScaLAPACK_Grid *grid;
1730   MPI_Comm           icomm;
1731   PetscBLASInt       nprow,npcol,myrow,mycol;
1732   PetscInt           optv1,k=2,array[2]={0,0};
1733   PetscMPIInt        size;
1734 
1735   PetscFunctionBegin;
1736   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1737   A->insertmode = NOT_SET_VALUES;
1738 
1739   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1740   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1741   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1742   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1743   A->stash.ScatterDestroy = NULL;
1744 
1745   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1746   A->data = (void*)a;
1747 
1748   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1749   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1750     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr);
1751     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1752     ierr = PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);CHKERRQ(ierr);
1753   }
1754   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1755   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1756   if (!flg) {
1757     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1758 
1759     ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr);
1760     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1761 
1762     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1763     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1764     if (flg1) {
1765       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1766       grid->nprow = optv1;
1767     }
1768     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1769 
1770     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1771     grid->npcol = size/grid->nprow;
1772     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1773     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1774     grid->ictxt = Csys2blacs_handle(icomm);
1775     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1776     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1777     grid->grid_refct = 1;
1778     grid->nprow      = nprow;
1779     grid->npcol      = npcol;
1780     grid->myrow      = myrow;
1781     grid->mycol      = mycol;
1782     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1783     grid->ictxrow = Csys2blacs_handle(icomm);
1784     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1785     grid->ictxcol = Csys2blacs_handle(icomm);
1786     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1787     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr);
1788 
1789   } else grid->grid_refct++;
1790   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1791   a->grid = grid;
1792   a->mb   = DEFAULT_BLOCKSIZE;
1793   a->nb   = DEFAULT_BLOCKSIZE;
1794 
1795   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1796   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1797   if (flg) {
1798     a->mb = array[0];
1799     a->nb = (k>1)? array[1]: a->mb;
1800   }
1801   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1802 
1803   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1806   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 /*@C
1811    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1812    (2D block cyclic distribution).
1813 
1814    Collective
1815 
1816    Input Parameters:
1817 +  comm - MPI communicator
1818 .  mb   - row block size (or PETSC_DECIDE to have it set)
1819 .  nb   - column block size (or PETSC_DECIDE to have it set)
1820 .  M    - number of global rows
1821 .  N    - number of global columns
1822 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1823 -  csrc - coordinate of process that owns the first column of the distributed matrix
1824 
1825    Output Parameter:
1826 .  A - the matrix
1827 
1828    Options Database Keys:
1829 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1830 
1831    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1832    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1833    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1834 
1835    Notes:
1836    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1837    is chosen.
1838 
1839    Storage Information:
1840    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1841    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1842    significance and are thus ignored. The block sizes refer to the values
1843    used for the distributed matrix, not the same meaning as in BAIJ.
1844 
1845    Level: intermediate
1846 
1847 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1848 @*/
1849 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1850 {
1851   PetscErrorCode ierr;
1852   Mat_ScaLAPACK  *a;
1853   PetscInt       m,n;
1854 
1855   PetscFunctionBegin;
1856   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1857   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1858   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1859   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1860   m = PETSC_DECIDE;
1861   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1862   n = PETSC_DECIDE;
1863   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1864   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1865   a = (Mat_ScaLAPACK*)(*A)->data;
1866   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1867   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1868   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1869   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1870   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1871   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1872   ierr = MatSetUp(*A);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876