xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision f5ff9c666c0d37e8a7ec3aa2f8e2aa9e44449bdb) !
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4 "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5 "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6 "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7 "       TITLE = {Sca{LAPACK} Users' Guide},\n"
8 "       PUBLISHER = {SIAM},\n"
9 "       ADDRESS = {Philadelphia, PA},\n"
10 "       YEAR = 1997\n"
11 "}\n";
12 static PetscBool ScaLAPACKCite = PETSC_FALSE;
13 
14 #define DEFAULT_BLOCKSIZE 64
15 
16 /*
17     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19 */
20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
21 
22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
28   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
33 {
34   PetscErrorCode    ierr;
35   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
36   PetscBool         iascii;
37   PetscViewerFormat format;
38   Mat               Adense;
39 
40   PetscFunctionBegin;
41   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
42   if (iascii) {
43     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
44     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
45       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
46       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
47       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
48       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
49       PetscFunctionReturn(0);
50     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
51       PetscFunctionReturn(0);
52     }
53   }
54   /* convert to dense format and call MatView() */
55   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
56   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
57   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
62 {
63   PetscErrorCode ierr;
64   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
65   PetscLogDouble isend[2],irecv[2];
66 
67   PetscFunctionBegin;
68   info->block_size = 1.0;
69 
70   isend[0] = a->lld*a->locc;     /* locally allocated */
71   isend[1] = a->locr*a->locc;    /* used submatrix */
72   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
73     info->nz_allocated   = isend[0];
74     info->nz_used        = isend[1];
75   } else if (flag == MAT_GLOBAL_MAX) {
76     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
77     info->nz_allocated   = irecv[0];
78     info->nz_used        = irecv[1];
79   } else if (flag == MAT_GLOBAL_SUM) {
80     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
81     info->nz_allocated   = irecv[0];
82     info->nz_used        = irecv[1];
83   }
84 
85   info->nz_unneeded       = 0;
86   info->assemblies        = A->num_ass;
87   info->mallocs           = 0;
88   info->memory            = ((PetscObject)A)->mem;
89   info->fill_ratio_given  = 0;
90   info->fill_ratio_needed = 0;
91   info->factor_mallocs    = 0;
92   PetscFunctionReturn(0);
93 }
94 
95 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
96 {
97   PetscFunctionBegin;
98   switch (op) {
99     case MAT_NEW_NONZERO_LOCATIONS:
100     case MAT_NEW_NONZERO_LOCATION_ERR:
101     case MAT_NEW_NONZERO_ALLOCATION_ERR:
102     case MAT_SYMMETRIC:
103     case MAT_SORTED_FULL:
104     case MAT_HERMITIAN:
105       break;
106     default:
107       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
108   }
109   PetscFunctionReturn(0);
110 }
111 
112 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
113 {
114   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
115   PetscErrorCode ierr;
116   PetscInt       i,j;
117   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
118 
119   PetscFunctionBegin;
120   for (i=0;i<nr;i++) {
121     if (rows[i] < 0) continue;
122     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
123     for (j=0;j<nc;j++) {
124       if (cols[j] < 0) continue;
125       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
126       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
127       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
128         switch (imode) {
129           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
130           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
131           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
132         }
133       } else {
134         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
135         A->assembled = PETSC_FALSE;
136         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
144 {
145   PetscErrorCode ierr;
146   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
147   PetscScalar    *x2d,*y2d,alpha=1.0;
148   const PetscInt *ranges;
149   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
150 
151   PetscFunctionBegin;
152   if (transpose) {
153 
154     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
155     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
156     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
157     xlld = PetscMax(1,A->rmap->n);
158     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
159     PetscCheckScaLapackInfo("descinit",info);
160     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
161     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
162     ylld = 1;
163     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
164     PetscCheckScaLapackInfo("descinit",info);
165 
166     /* allocate 2d vectors */
167     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
168     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
169     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
170     xlld = PetscMax(1,lszx);
171 
172     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
173     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
174     PetscCheckScaLapackInfo("descinit",info);
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177 
178     /* redistribute x as a column of a 2d matrix */
179     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
180 
181     /* redistribute y as a row of a 2d matrix */
182     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
183 
184     /* call PBLAS subroutine */
185     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
186 
187     /* redistribute y from a row of a 2d matrix */
188     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
189 
190   } else {   /* non-transpose */
191 
192     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
193     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
194     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
195     xlld = 1;
196     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
197     PetscCheckScaLapackInfo("descinit",info);
198     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
199     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
200     ylld = PetscMax(1,A->rmap->n);
201     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
202     PetscCheckScaLapackInfo("descinit",info);
203 
204     /* allocate 2d vectors */
205     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
206     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
207     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
208     ylld = PetscMax(1,lszy);
209 
210     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
211     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
212     PetscCheckScaLapackInfo("descinit",info);
213     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
214     PetscCheckScaLapackInfo("descinit",info);
215 
216     /* redistribute x as a row of a 2d matrix */
217     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
218 
219     /* redistribute y as a column of a 2d matrix */
220     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
221 
222     /* call PBLAS subroutine */
223     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
224 
225     /* redistribute y from a column of a 2d matrix */
226     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
227 
228   }
229   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
234 {
235   PetscErrorCode    ierr;
236   const PetscScalar *xarray;
237   PetscScalar       *yarray;
238 
239   PetscFunctionBegin;
240   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
241   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
242   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
243   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
244   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
249 {
250   PetscErrorCode    ierr;
251   const PetscScalar *xarray;
252   PetscScalar       *yarray;
253 
254   PetscFunctionBegin;
255   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
256   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
257   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
258   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
259   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
264 {
265   PetscErrorCode    ierr;
266   const PetscScalar *xarray;
267   PetscScalar       *zarray;
268 
269   PetscFunctionBegin;
270   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
271   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
272   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
273   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
274   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
275   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
280 {
281   PetscErrorCode    ierr;
282   const PetscScalar *xarray;
283   PetscScalar       *zarray;
284 
285   PetscFunctionBegin;
286   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
287   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
288   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
289   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
290   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
291   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
296 {
297   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
298   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
299   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
300   PetscScalar   sone=1.0,zero=0.0;
301   PetscBLASInt  one=1;
302 
303   PetscFunctionBegin;
304   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
305   C->assembled = PETSC_TRUE;
306   PetscFunctionReturn(0);
307 }
308 
309 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
310 {
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
315   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
316   ierr = MatSetUp(C);CHKERRQ(ierr);
317   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
318   PetscFunctionReturn(0);
319 }
320 
321 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
322 {
323   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
324   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
325   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
326   PetscScalar   sone=1.0,zero=0.0;
327   PetscBLASInt  one=1;
328 
329   PetscFunctionBegin;
330   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
331   C->assembled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
336 {
337   PetscErrorCode ierr;
338 
339   PetscFunctionBegin;
340   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
341   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
342   ierr = MatSetUp(C);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /* --------------------------------------- */
347 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
348 {
349   PetscFunctionBegin;
350   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
351   C->ops->productsymbolic = MatProductSymbolic_AB;
352   PetscFunctionReturn(0);
353 }
354 
355 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
356 {
357   PetscFunctionBegin;
358   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
359   C->ops->productsymbolic          = MatProductSymbolic_ABt;
360   PetscFunctionReturn(0);
361 }
362 
363 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
364 {
365   PetscErrorCode ierr;
366   Mat_Product    *product = C->product;
367 
368   PetscFunctionBegin;
369   switch (product->type) {
370     case MATPRODUCT_AB:
371       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
372       break;
373     case MATPRODUCT_ABt:
374       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
375       break;
376     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
377   }
378   PetscFunctionReturn(0);
379 }
380 /* --------------------------------------- */
381 
382 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
383 {
384   PetscErrorCode    ierr;
385   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
386   PetscScalar       *darray,*d2d,v;
387   const PetscInt    *ranges;
388   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
389 
390   PetscFunctionBegin;
391   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
392 
393   if (A->rmap->N<=A->cmap->N) {   /* row version */
394 
395     /* create ScaLAPACK descriptor for vector (1d block distribution) */
396     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
397     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
398     dlld = PetscMax(1,A->rmap->n);
399     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
400     PetscCheckScaLapackInfo("descinit",info);
401 
402     /* allocate 2d vector */
403     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
404     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
405     dlld = PetscMax(1,lszd);
406 
407     /* create ScaLAPACK descriptor for vector (2d block distribution) */
408     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
409     PetscCheckScaLapackInfo("descinit",info);
410 
411     /* collect diagonal */
412     for (j=1;j<=a->M;j++) {
413       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
414       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
415     }
416 
417     /* redistribute d from a column of a 2d matrix */
418     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
419     ierr = PetscFree(d2d);CHKERRQ(ierr);
420 
421   } else {   /* column version */
422 
423     /* create ScaLAPACK descriptor for vector (1d block distribution) */
424     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
425     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
426     dlld = 1;
427     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
428     PetscCheckScaLapackInfo("descinit",info);
429 
430     /* allocate 2d vector */
431     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
432     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
433 
434     /* create ScaLAPACK descriptor for vector (2d block distribution) */
435     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
436     PetscCheckScaLapackInfo("descinit",info);
437 
438     /* collect diagonal */
439     for (j=1;j<=a->N;j++) {
440       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
441       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
442     }
443 
444     /* redistribute d from a row of a 2d matrix */
445     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
446     ierr = PetscFree(d2d);CHKERRQ(ierr);
447   }
448 
449   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
450   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
451   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
456 {
457   PetscErrorCode    ierr;
458   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
459   const PetscScalar *d;
460   const PetscInt    *ranges;
461   PetscScalar       *d2d;
462   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
463 
464   PetscFunctionBegin;
465   if (R) {
466     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
467     /* create ScaLAPACK descriptor for vector (1d block distribution) */
468     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
469     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
470     dlld = 1;
471     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
472     PetscCheckScaLapackInfo("descinit",info);
473 
474     /* allocate 2d vector */
475     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
476     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
477 
478     /* create ScaLAPACK descriptor for vector (2d block distribution) */
479     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
480     PetscCheckScaLapackInfo("descinit",info);
481 
482     /* redistribute d to a row of a 2d matrix */
483     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
484 
485     /* broadcast along process columns */
486     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
487     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
488 
489     /* local scaling */
490     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
491 
492     ierr = PetscFree(d2d);CHKERRQ(ierr);
493     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
494   }
495   if (L) {
496     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
497     /* create ScaLAPACK descriptor for vector (1d block distribution) */
498     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
499     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
500     dlld = PetscMax(1,A->rmap->n);
501     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
502     PetscCheckScaLapackInfo("descinit",info);
503 
504     /* allocate 2d vector */
505     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
506     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
507     dlld = PetscMax(1,lszd);
508 
509     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
511     PetscCheckScaLapackInfo("descinit",info);
512 
513     /* redistribute d to a column of a 2d matrix */
514     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
515 
516     /* broadcast along process rows */
517     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
518     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
519 
520     /* local scaling */
521     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
522 
523     ierr = PetscFree(d2d);CHKERRQ(ierr);
524     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
530 {
531   PetscFunctionBegin;
532   *missing = PETSC_FALSE;
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  n,one=1;
540 
541   PetscFunctionBegin;
542   n = x->lld*x->locc;
543   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
544   PetscFunctionReturn(0);
545 }
546 
547 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
548 {
549   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
550   PetscBLASInt  i,n;
551   PetscScalar   v;
552 
553   PetscFunctionBegin;
554   n = PetscMin(x->M,x->N);
555   for (i=1;i<=n;i++) {
556     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
557     v += alpha;
558     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
564 {
565   PetscErrorCode ierr;
566   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
567   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
568   PetscBLASInt   one=1;
569   PetscScalar    beta=1.0;
570 
571   PetscFunctionBegin;
572   MatScaLAPACKCheckDistribution(Y,1,X,3);
573   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
574   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
579 {
580   PetscErrorCode ierr;
581   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
582   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
583 
584   PetscFunctionBegin;
585   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
586   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 
590 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
591 {
592   Mat            Bs;
593   MPI_Comm       comm;
594   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
595   PetscErrorCode ierr;
596 
597   PetscFunctionBegin;
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
600   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
601   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
602   b = (Mat_ScaLAPACK*)Bs->data;
603   b->M    = a->M;
604   b->N    = a->N;
605   b->mb   = a->mb;
606   b->nb   = a->nb;
607   b->rsrc = a->rsrc;
608   b->csrc = a->csrc;
609   ierr = MatSetUp(Bs);CHKERRQ(ierr);
610   *B = Bs;
611   if (op == MAT_COPY_VALUES) {
612     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
613   }
614   Bs->assembled = PETSC_TRUE;
615   PetscFunctionReturn(0);
616 }
617 
618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
619 {
620   PetscErrorCode ierr;
621   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
622   Mat            Bs = *B;
623   PetscBLASInt   one=1;
624   PetscScalar    sone=1.0,zero=0.0;
625 #if defined(PETSC_USE_COMPLEX)
626   PetscInt       i,j;
627 #endif
628 
629   PetscFunctionBegin;
630   if (reuse == MAT_INITIAL_MATRIX) {
631     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
632     *B = Bs;
633   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
634   b = (Mat_ScaLAPACK*)Bs->data;
635   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
636 #if defined(PETSC_USE_COMPLEX)
637   /* undo conjugation */
638   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
639 #endif
640   Bs->assembled = PETSC_TRUE;
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645 {
646   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
647   PetscInt      i,j;
648 
649   PetscFunctionBegin;
650   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
651   PetscFunctionReturn(0);
652 }
653 
654 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
655 {
656   PetscErrorCode ierr;
657   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
658   Mat            Bs = *B;
659   PetscBLASInt   one=1;
660   PetscScalar    sone=1.0,zero=0.0;
661 
662   PetscFunctionBegin;
663   if (reuse == MAT_INITIAL_MATRIX) {
664     ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr);
665     *B = Bs;
666   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
667   b = (Mat_ScaLAPACK*)Bs->data;
668   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
669   Bs->assembled = PETSC_TRUE;
670   PetscFunctionReturn(0);
671 }
672 
673 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
674 {
675   PetscErrorCode ierr;
676   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
677   PetscScalar    *x,*x2d;
678   const PetscInt *ranges;
679   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
680 
681   PetscFunctionBegin;
682   ierr = VecCopy(B,X);CHKERRQ(ierr);
683   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
684 
685   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
687   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
688   xlld = PetscMax(1,A->rmap->n);
689   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
690   PetscCheckScaLapackInfo("descinit",info);
691 
692   /* allocate 2d vector */
693   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
694   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
695   xlld = PetscMax(1,lszx);
696 
697   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
699   PetscCheckScaLapackInfo("descinit",info);
700 
701   /* redistribute x as a column of a 2d matrix */
702   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
703 
704   /* call ScaLAPACK subroutine */
705   switch (A->factortype) {
706     case MAT_FACTOR_LU:
707       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
708       PetscCheckScaLapackInfo("getrs",info);
709       break;
710     case MAT_FACTOR_CHOLESKY:
711       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
712       PetscCheckScaLapackInfo("potrs",info);
713       break;
714     default:
715       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
716   }
717 
718   /* redistribute x from a column of a 2d matrix */
719   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
720 
721   ierr = PetscFree(x2d);CHKERRQ(ierr);
722   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
727 {
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
732   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 
736 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
737 {
738   PetscErrorCode ierr;
739   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
740   PetscBool      flg1,flg2;
741   PetscBLASInt   one=1,info;
742 
743   PetscFunctionBegin;
744   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
745   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
746   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
747   MatScaLAPACKCheckDistribution(B,1,X,2);
748   b = (Mat_ScaLAPACK*)B->data;
749   x = (Mat_ScaLAPACK*)X->data;
750   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
751 
752   switch (A->factortype) {
753     case MAT_FACTOR_LU:
754       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
755       PetscCheckScaLapackInfo("getrs",info);
756       break;
757     case MAT_FACTOR_CHOLESKY:
758       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
759       PetscCheckScaLapackInfo("potrs",info);
760       break;
761     default:
762       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
768 {
769   PetscErrorCode ierr;
770   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
771   PetscBLASInt   one=1,info;
772 
773   PetscFunctionBegin;
774   if (!a->pivots) {
775     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
776     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
777   }
778   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
779   PetscCheckScaLapackInfo("getrf",info);
780   A->factortype = MAT_FACTOR_LU;
781   A->assembled  = PETSC_TRUE;
782 
783   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
784   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
789 {
790   PetscErrorCode ierr;
791 
792   PetscFunctionBegin;
793   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
794   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
799 {
800   PetscFunctionBegin;
801   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
802   PetscFunctionReturn(0);
803 }
804 
805 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
806 {
807   PetscErrorCode ierr;
808   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
809   PetscBLASInt   one=1,info;
810 
811   PetscFunctionBegin;
812   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
813   PetscCheckScaLapackInfo("potrf",info);
814   A->factortype = MAT_FACTOR_CHOLESKY;
815   A->assembled  = PETSC_TRUE;
816 
817   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
818   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
823 {
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
828   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
833 {
834   PetscFunctionBegin;
835   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
840 {
841   PetscFunctionBegin;
842   *type = MATSOLVERSCALAPACK;
843   PetscFunctionReturn(0);
844 }
845 
846 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
847 {
848   Mat            B;
849   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
850   PetscErrorCode ierr;
851 
852   PetscFunctionBegin;
853   /* Create the factorization matrix */
854   ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr);
855   B->factortype = ftype;
856   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
857   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
858 
859   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
860   *F = B;
861   PetscFunctionReturn(0);
862 }
863 
864 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
865 {
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
870   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
875 {
876   PetscErrorCode ierr;
877   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
878   PetscBLASInt   one=1,lwork=0;
879   const char     *ntype;
880   PetscScalar    *work=NULL,dummy;
881 
882   PetscFunctionBegin;
883   switch (type){
884     case NORM_1:
885       ntype = "1";
886       lwork = PetscMax(a->locr,a->locc);
887       break;
888     case NORM_FROBENIUS:
889       ntype = "F";
890       work  = &dummy;
891       break;
892     case NORM_INFINITY:
893       ntype = "I";
894       lwork = PetscMax(a->locr,a->locc);
895       break;
896     default:
897       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
898   }
899   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
900   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
901   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
902   PetscFunctionReturn(0);
903 }
904 
905 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
906 {
907   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
908   PetscErrorCode ierr;
909 
910   PetscFunctionBegin;
911   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
916 {
917   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
918   PetscErrorCode ierr;
919   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
920 
921   PetscFunctionBegin;
922   if (rows) {
923     n     = a->locr;
924     nb    = a->mb;
925     isrc  = a->rsrc;
926     nproc = a->grid->nprow;
927     iproc = a->grid->myrow;
928     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
929     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
930     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
931   }
932   if (cols) {
933     n     = a->locc;
934     nb    = a->nb;
935     isrc  = a->csrc;
936     nproc = a->grid->npcol;
937     iproc = a->grid->mycol;
938     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
939     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
940     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
946 {
947   PetscErrorCode ierr;
948   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
949   Mat            Bmpi;
950   MPI_Comm       comm;
951   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
952   const PetscInt *ranges,*branges,*cwork;
953   const PetscScalar *vwork;
954   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
955   PetscScalar    *barray;
956   PetscBool      differ=PETSC_FALSE;
957   PetscMPIInt    size;
958 
959   PetscFunctionBegin;
960   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
961   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
962 
963   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
964     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
965     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
966     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
967   }
968 
969   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
970     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
971     m = PETSC_DECIDE;
972     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
973     n = PETSC_DECIDE;
974     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
975     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
976     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
977     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
978 
979     /* create ScaLAPACK descriptor for B (1d block distribution) */
980     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
981     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
983     PetscCheckScaLapackInfo("descinit",info);
984 
985     /* redistribute matrix */
986     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
987     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
988     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
989     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991 
992     /* transfer rows of auxiliary matrix to the final matrix B */
993     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
994     for (i=rstart;i<rend;i++) {
995       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
996       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
997       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
998     }
999     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1000     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
1002 
1003   } else {  /* normal cases */
1004 
1005     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1006     else {
1007       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1008       m = PETSC_DECIDE;
1009       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1010       n = PETSC_DECIDE;
1011       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1012       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1013       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1014       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1015     }
1016 
1017     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1019     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1020     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1021     PetscCheckScaLapackInfo("descinit",info);
1022 
1023     /* redistribute matrix */
1024     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1025     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1026     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1027 
1028     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030     if (reuse == MAT_INPLACE_MATRIX) {
1031       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1032     } else *B = Bmpi;
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1038 {
1039   PetscErrorCode ierr;
1040   Mat_ScaLAPACK  *b;
1041   Mat            Bmpi;
1042   MPI_Comm       comm;
1043   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1044   const PetscInt *ranges;
1045   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1046   PetscScalar    *aarray;
1047   PetscInt       lda;
1048 
1049   PetscFunctionBegin;
1050   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1051 
1052   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1053   else {
1054     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1055     m = PETSC_DECIDE;
1056     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1057     n = PETSC_DECIDE;
1058     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1059     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1060     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1061     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1062   }
1063   b = (Mat_ScaLAPACK*)Bmpi->data;
1064 
1065   /* create ScaLAPACK descriptor for A (1d block distribution) */
1066   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1067   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1068   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1069   lld = PetscMax(lda,1);  /* local leading dimension */
1070   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1071   PetscCheckScaLapackInfo("descinit",info);
1072 
1073   /* redistribute matrix */
1074   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1075   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1076   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1077 
1078   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1079   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080   if (reuse == MAT_INPLACE_MATRIX) {
1081     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1082   } else *B = Bmpi;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1087 {
1088   Mat               mat_scal;
1089   PetscErrorCode    ierr;
1090   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1091   const PetscInt    *cols;
1092   const PetscScalar *vals;
1093 
1094   PetscFunctionBegin;
1095   if (reuse == MAT_REUSE_MATRIX) {
1096     mat_scal = *newmat;
1097     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1098   } else {
1099     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1100     m = PETSC_DECIDE;
1101     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1102     n = PETSC_DECIDE;
1103     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1104     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1105     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1106     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1107   }
1108   for (row=rstart;row<rend;row++) {
1109     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1110     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1111     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1112   }
1113   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1114   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1115 
1116   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1117   else *newmat = mat_scal;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1122 {
1123   Mat               mat_scal;
1124   PetscErrorCode    ierr;
1125   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1126   const PetscInt    *cols;
1127   const PetscScalar *vals;
1128   PetscScalar       v;
1129 
1130   PetscFunctionBegin;
1131   if (reuse == MAT_REUSE_MATRIX) {
1132     mat_scal = *newmat;
1133     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1134   } else {
1135     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1136     m = PETSC_DECIDE;
1137     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1138     n = PETSC_DECIDE;
1139     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1140     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1141     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1142     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1143   }
1144   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1145   for (row=rstart;row<rend;row++) {
1146     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1147     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1148     for (j=0;j<ncols;j++) { /* lower triangular part */
1149       if (cols[j] == row) continue;
1150       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1151       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1152     }
1153     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1154   }
1155   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1156   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158 
1159   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1160   else *newmat = mat_scal;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1165 {
1166   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1167   PetscErrorCode ierr;
1168   PetscInt       sz=0;
1169 
1170   PetscFunctionBegin;
1171   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1172   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1173   if (!a->lld) a->lld = a->locr;
1174 
1175   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1176   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1177   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1178   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1179 
1180   A->preallocated = PETSC_TRUE;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1185 {
1186   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1187   PetscErrorCode     ierr;
1188   Mat_ScaLAPACK_Grid *grid;
1189   PetscBool          flg;
1190   MPI_Comm           icomm;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1194   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1195   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1196   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1197   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1198   if (--grid->grid_refct == 0) {
1199     Cblacs_gridexit(grid->ictxt);
1200     Cblacs_gridexit(grid->ictxrow);
1201     Cblacs_gridexit(grid->ictxcol);
1202     ierr = PetscFree(grid);CHKERRQ(ierr);
1203     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
1204   }
1205   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscFree(A->data);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1215 {
1216   PetscErrorCode ierr;
1217   const PetscInt *ranges;
1218   PetscMPIInt    size;
1219   PetscInt       i,n;
1220 
1221   PetscFunctionBegin;
1222   ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr);
1223   if (size>2) {
1224     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1225     n = ranges[1]-ranges[0];
1226     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1227     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1233 {
1234   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1235   PetscErrorCode ierr;
1236   PetscBLASInt   info=0;
1237 
1238   PetscFunctionBegin;
1239   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1240   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1241 
1242   /* check that the layout is as enforced by MatCreateScaLAPACK */
1243   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1244   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1245 
1246   /* compute local sizes */
1247   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1248   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1249   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1250   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1251   a->lld  = PetscMax(1,a->locr);
1252 
1253   /* allocate local array */
1254   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1255 
1256   /* set up ScaLAPACK descriptor */
1257   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1258   PetscCheckScaLapackInfo("descinit",info);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1263 {
1264   PetscErrorCode ierr;
1265   PetscInt       nstash,reallocs;
1266 
1267   PetscFunctionBegin;
1268   if (A->nooffprocentries) PetscFunctionReturn(0);
1269   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1270   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1271   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1276 {
1277   PetscErrorCode ierr;
1278   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1279   PetscMPIInt    n;
1280   PetscInt       i,flg,*row,*col;
1281   PetscScalar    *val;
1282   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1283 
1284   PetscFunctionBegin;
1285   if (A->nooffprocentries) PetscFunctionReturn(0);
1286   while (1) {
1287     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1288     if (!flg) break;
1289     for (i=0;i<n;i++) {
1290       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1291       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1292       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1293       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1294       switch (A->insertmode) {
1295         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1296         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1297         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1298       }
1299     }
1300   }
1301   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1306 {
1307   PetscErrorCode ierr;
1308   Mat            Adense,As;
1309   MPI_Comm       comm;
1310 
1311   PetscFunctionBegin;
1312   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1313   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1314   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1315   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1316   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1317   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1318   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /* -------------------------------------------------------------------*/
1323 static struct _MatOps MatOps_Values = {
1324        MatSetValues_ScaLAPACK,
1325        0,
1326        0,
1327        MatMult_ScaLAPACK,
1328 /* 4*/ MatMultAdd_ScaLAPACK,
1329        MatMultTranspose_ScaLAPACK,
1330        MatMultTransposeAdd_ScaLAPACK,
1331        MatSolve_ScaLAPACK,
1332        MatSolveAdd_ScaLAPACK,
1333        0,
1334 /*10*/ 0,
1335        MatLUFactor_ScaLAPACK,
1336        MatCholeskyFactor_ScaLAPACK,
1337        0,
1338        MatTranspose_ScaLAPACK,
1339 /*15*/ MatGetInfo_ScaLAPACK,
1340        0,
1341        MatGetDiagonal_ScaLAPACK,
1342        MatDiagonalScale_ScaLAPACK,
1343        MatNorm_ScaLAPACK,
1344 /*20*/ MatAssemblyBegin_ScaLAPACK,
1345        MatAssemblyEnd_ScaLAPACK,
1346        MatSetOption_ScaLAPACK,
1347        MatZeroEntries_ScaLAPACK,
1348 /*24*/ 0,
1349        MatLUFactorSymbolic_ScaLAPACK,
1350        MatLUFactorNumeric_ScaLAPACK,
1351        MatCholeskyFactorSymbolic_ScaLAPACK,
1352        MatCholeskyFactorNumeric_ScaLAPACK,
1353 /*29*/ MatSetUp_ScaLAPACK,
1354        0,
1355        0,
1356        0,
1357        0,
1358 /*34*/ MatDuplicate_ScaLAPACK,
1359        0,
1360        0,
1361        0,
1362        0,
1363 /*39*/ MatAXPY_ScaLAPACK,
1364        0,
1365        0,
1366        0,
1367        MatCopy_ScaLAPACK,
1368 /*44*/ 0,
1369        MatScale_ScaLAPACK,
1370        MatShift_ScaLAPACK,
1371        0,
1372        0,
1373 /*49*/ 0,
1374        0,
1375        0,
1376        0,
1377        0,
1378 /*54*/ 0,
1379        0,
1380        0,
1381        0,
1382        0,
1383 /*59*/ 0,
1384        MatDestroy_ScaLAPACK,
1385        MatView_ScaLAPACK,
1386        0,
1387        0,
1388 /*64*/ 0,
1389        0,
1390        0,
1391        0,
1392        0,
1393 /*69*/ 0,
1394        0,
1395        MatConvert_ScaLAPACK_Dense,
1396        0,
1397        0,
1398 /*74*/ 0,
1399        0,
1400        0,
1401        0,
1402        0,
1403 /*79*/ 0,
1404        0,
1405        0,
1406        0,
1407        MatLoad_ScaLAPACK,
1408 /*84*/ 0,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*89*/ 0,
1414        0,
1415        MatMatMultNumeric_ScaLAPACK,
1416        0,
1417        0,
1418 /*94*/ 0,
1419        0,
1420        0,
1421        MatMatTransposeMultNumeric_ScaLAPACK,
1422        0,
1423 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1424        0,
1425        0,
1426        MatConjugate_ScaLAPACK,
1427        0,
1428 /*104*/0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*109*/MatMatSolve_ScaLAPACK,
1434        0,
1435        0,
1436        0,
1437        MatMissingDiagonal_ScaLAPACK,
1438 /*114*/0,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*119*/0,
1444        MatHermitianTranspose_ScaLAPACK,
1445        0,
1446        0,
1447        0,
1448 /*124*/0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*129*/0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*134*/0,
1459        0,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*140*/0,
1465        0,
1466        0,
1467        0,
1468        0,
1469 /*145*/0,
1470        0,
1471        0
1472 };
1473 
1474 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1475 {
1476   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1477   PetscInt           size=stash->size,nsends;
1478   PetscErrorCode     ierr;
1479   PetscInt           count,*sindices,**rindices,i,j,l;
1480   PetscScalar        **rvalues,*svalues;
1481   MPI_Comm           comm = stash->comm;
1482   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1483   PetscMPIInt        *sizes,*nlengths,nreceives;
1484   PetscInt           *sp_idx,*sp_idy;
1485   PetscScalar        *sp_val;
1486   PetscMatStashSpace space,space_next;
1487   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1488   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1489 
1490   PetscFunctionBegin;
1491   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1492     InsertMode addv;
1493     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1494     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1495     mat->insertmode = addv; /* in case this processor had no cache */
1496   }
1497 
1498   bs2 = stash->bs*stash->bs;
1499 
1500   /*  first count number of contributors to each processor */
1501   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1502   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1503 
1504   i     = j    = 0;
1505   space = stash->space_head;
1506   while (space) {
1507     space_next = space->next;
1508     for (l=0; l<space->local_used; l++) {
1509       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1511       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1512       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1513       nlengths[j]++; owner[i] = j;
1514       i++;
1515     }
1516     space = space_next;
1517   }
1518 
1519   /* Now check what procs get messages - and compute nsends. */
1520   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1521   for (i=0, nsends=0; i<size; i++) {
1522     if (nlengths[i]) {
1523       sizes[i] = 1; nsends++;
1524     }
1525   }
1526 
1527   {PetscMPIInt *onodes,*olengths;
1528    /* Determine the number of messages to expect, their lengths, from from-ids */
1529    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1530    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1531    /* since clubbing row,col - lengths are multiplied by 2 */
1532    for (i=0; i<nreceives; i++) olengths[i] *=2;
1533    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1534    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1535    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1536    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1537    ierr = PetscFree(onodes);CHKERRQ(ierr);
1538    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1539 
1540   /* do sends:
1541       1) starts[i] gives the starting index in svalues for stuff going to
1542          the ith processor
1543   */
1544   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1545   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1546   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1547   /* use 2 sends the first with all_a, the next with all_i and all_j */
1548   startv[0] = 0; starti[0] = 0;
1549   for (i=1; i<size; i++) {
1550     startv[i] = startv[i-1] + nlengths[i-1];
1551     starti[i] = starti[i-1] + 2*nlengths[i-1];
1552   }
1553 
1554   i     = 0;
1555   space = stash->space_head;
1556   while (space) {
1557     space_next = space->next;
1558     sp_idx     = space->idx;
1559     sp_idy     = space->idy;
1560     sp_val     = space->val;
1561     for (l=0; l<space->local_used; l++) {
1562       j = owner[i];
1563       if (bs2 == 1) {
1564         svalues[startv[j]] = sp_val[l];
1565       } else {
1566         PetscInt    k;
1567         PetscScalar *buf1,*buf2;
1568         buf1 = svalues+bs2*startv[j];
1569         buf2 = space->val + bs2*l;
1570         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1571       }
1572       sindices[starti[j]]             = sp_idx[l];
1573       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1574       startv[j]++;
1575       starti[j]++;
1576       i++;
1577     }
1578     space = space_next;
1579   }
1580   startv[0] = 0;
1581   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1582 
1583   for (i=0,count=0; i<size; i++) {
1584     if (sizes[i]) {
1585       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
1586       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
1587     }
1588   }
1589 #if defined(PETSC_USE_INFO)
1590   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1591   for (i=0; i<size; i++) {
1592     if (sizes[i]) {
1593       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1594     }
1595   }
1596 #endif
1597   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1598   ierr = PetscFree(owner);CHKERRQ(ierr);
1599   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1600   ierr = PetscFree(sizes);CHKERRQ(ierr);
1601 
1602   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1603   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1604 
1605   for (i=0; i<nreceives; i++) {
1606     recv_waits[2*i]   = recv_waits1[i];
1607     recv_waits[2*i+1] = recv_waits2[i];
1608   }
1609   stash->recv_waits = recv_waits;
1610 
1611   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1612   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1613 
1614   stash->svalues         = svalues;
1615   stash->sindices        = sindices;
1616   stash->rvalues         = rvalues;
1617   stash->rindices        = rindices;
1618   stash->send_waits      = send_waits;
1619   stash->nsends          = nsends;
1620   stash->nrecvs          = nreceives;
1621   stash->reproduce_count = 0;
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1626 {
1627   PetscErrorCode ierr;
1628   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1629 
1630   PetscFunctionBegin;
1631   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1632   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1633   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1634   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1635   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 /*@
1640    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1641    the ScaLAPACK matrix
1642 
1643    Logically Collective on A
1644 
1645    Input Parameter:
1646 +  A  - a MATSCALAPACK matrix
1647 .  mb - the row block size
1648 -  nb - the column block size
1649 
1650    Level: intermediate
1651 
1652 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1653 @*/
1654 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1655 {
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1660   PetscValidLogicalCollectiveInt(A,mb,2);
1661   PetscValidLogicalCollectiveInt(A,nb,3);
1662   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1667 {
1668   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1669 
1670   PetscFunctionBegin;
1671   if (mb) *mb = a->mb;
1672   if (nb) *nb = a->nb;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 /*@
1677    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1678    the ScaLAPACK matrix
1679 
1680    Not collective
1681 
1682    Input Parameter:
1683 .  A  - a MATSCALAPACK matrix
1684 
1685    Output Parameters:
1686 +  mb - the row block size
1687 -  nb - the column block size
1688 
1689    Level: intermediate
1690 
1691 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1692 @*/
1693 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1694 {
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1699   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1704 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1705 
1706 /*MC
1707    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1708 
1709    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1710 
1711    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1712 
1713    Options Database Keys:
1714 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1715 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1716 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1717 
1718    Level: beginner
1719 
1720 .seealso: MATDENSE, MATELEMENTAL
1721 M*/
1722 
1723 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1724 {
1725   Mat_ScaLAPACK      *a;
1726   PetscErrorCode     ierr;
1727   PetscBool          flg,flg1;
1728   Mat_ScaLAPACK_Grid *grid;
1729   MPI_Comm           icomm;
1730   PetscBLASInt       nprow,npcol,myrow,mycol;
1731   PetscInt           optv1,k=2,array[2]={0,0};
1732   PetscMPIInt        size;
1733 
1734   PetscFunctionBegin;
1735   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1736   A->insertmode = NOT_SET_VALUES;
1737 
1738   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1739   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1740   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1741   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1742   A->stash.ScatterDestroy = NULL;
1743 
1744   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1745   A->data = (void*)a;
1746 
1747   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1748   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1749     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr);
1750     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1751     ierr = PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);CHKERRQ(ierr);
1752   }
1753   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1754   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1755   if (!flg) {
1756     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1757 
1758     ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr);
1759     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1760 
1761     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1762     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1763     if (flg1) {
1764       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1765       grid->nprow = optv1;
1766     }
1767     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1768 
1769     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1770     grid->npcol = size/grid->nprow;
1771     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1772     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1773     grid->ictxt = Csys2blacs_handle(icomm);
1774     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1775     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1776     grid->grid_refct = 1;
1777     grid->nprow      = nprow;
1778     grid->npcol      = npcol;
1779     grid->myrow      = myrow;
1780     grid->mycol      = mycol;
1781     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1782     grid->ictxrow = Csys2blacs_handle(icomm);
1783     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1784     grid->ictxcol = Csys2blacs_handle(icomm);
1785     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1786     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr);
1787 
1788   } else grid->grid_refct++;
1789   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1790   a->grid = grid;
1791   a->mb   = DEFAULT_BLOCKSIZE;
1792   a->nb   = DEFAULT_BLOCKSIZE;
1793 
1794   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1795   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1796   if (flg) {
1797     a->mb = array[0];
1798     a->nb = (k>1)? array[1]: a->mb;
1799   }
1800   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1801 
1802   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1805   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 /*@C
1810    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1811    (2D block cyclic distribution).
1812 
1813    Collective
1814 
1815    Input Parameters:
1816 +  comm - MPI communicator
1817 .  mb   - row block size (or PETSC_DECIDE to have it set)
1818 .  nb   - column block size (or PETSC_DECIDE to have it set)
1819 .  M    - number of global rows
1820 .  N    - number of global columns
1821 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1822 -  csrc - coordinate of process that owns the first column of the distributed matrix
1823 
1824    Output Parameter:
1825 .  A - the matrix
1826 
1827    Options Database Keys:
1828 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1829 
1830    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1831    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1832    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1833 
1834    Notes:
1835    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1836    is chosen.
1837 
1838    Storage Information:
1839    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1840    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1841    significance and are thus ignored. The block sizes refer to the values
1842    used for the distributed matrix, not the same meaning as in BAIJ.
1843 
1844    Level: intermediate
1845 
1846 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1847 @*/
1848 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1849 {
1850   PetscErrorCode ierr;
1851   Mat_ScaLAPACK  *a;
1852   PetscInt       m,n;
1853 
1854   PetscFunctionBegin;
1855   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1856   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1857   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1858   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1859   m = PETSC_DECIDE;
1860   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1861   n = PETSC_DECIDE;
1862   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1863   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1864   a = (Mat_ScaLAPACK*)(*A)->data;
1865   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1866   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1867   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1868   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1869   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1870   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1871   ierr = MatSetUp(*A);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875