xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
17   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22 {
23   PetscErrorCode    ierr;
24   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
25   PetscBool         iascii;
26   PetscViewerFormat format;
27   Mat               Adense;
28 
29   PetscFunctionBegin;
30   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31   if (iascii) {
32     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
33     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
35       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
36       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
37       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
38       PetscFunctionReturn(0);
39     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40       PetscFunctionReturn(0);
41     }
42   }
43   /* convert to dense format and call MatView() */
44   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
45   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
46   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51 {
52   PetscErrorCode ierr;
53   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
54   PetscLogDouble isend[2],irecv[2];
55 
56   PetscFunctionBegin;
57   info->block_size = 1.0;
58 
59   isend[0] = a->lld*a->locc;     /* locally allocated */
60   isend[1] = a->locr*a->locc;    /* used submatrix */
61   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62     info->nz_allocated   = isend[0];
63     info->nz_used        = isend[1];
64   } else if (flag == MAT_GLOBAL_MAX) {
65     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
66     info->nz_allocated   = irecv[0];
67     info->nz_used        = irecv[1];
68   } else if (flag == MAT_GLOBAL_SUM) {
69     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
70     info->nz_allocated   = irecv[0];
71     info->nz_used        = irecv[1];
72   }
73 
74   info->nz_unneeded       = 0;
75   info->assemblies        = A->num_ass;
76   info->mallocs           = 0;
77   info->memory            = ((PetscObject)A)->mem;
78   info->fill_ratio_given  = 0;
79   info->fill_ratio_needed = 0;
80   info->factor_mallocs    = 0;
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85 {
86   PetscFunctionBegin;
87   switch (op) {
88     case MAT_NEW_NONZERO_LOCATIONS:
89     case MAT_NEW_NONZERO_LOCATION_ERR:
90     case MAT_NEW_NONZERO_ALLOCATION_ERR:
91     case MAT_SYMMETRIC:
92     case MAT_SORTED_FULL:
93     case MAT_HERMITIAN:
94       break;
95     default:
96       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102 {
103   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j;
106   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
107 
108   PetscFunctionBegin;
109   for (i=0;i<nr;i++) {
110     if (rows[i] < 0) continue;
111     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
112     for (j=0;j<nc;j++) {
113       if (cols[j] < 0) continue;
114       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
115       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117         switch (imode) {
118           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121         }
122       } else {
123         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124         A->assembled = PETSC_FALSE;
125         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
126       }
127     }
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133 {
134   PetscErrorCode ierr;
135   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136   PetscScalar    *x2d,*y2d,alpha=1.0;
137   const PetscInt *ranges;
138   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
139 
140   PetscFunctionBegin;
141   if (transpose) {
142 
143     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
145     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
146     xlld = PetscMax(1,A->rmap->n);
147     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148     PetscCheckScaLapackInfo("descinit",info);
149     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
150     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
151     ylld = 1;
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154 
155     /* allocate 2d vectors */
156     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
159     xlld = PetscMax(1,lszx);
160 
161     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163     PetscCheckScaLapackInfo("descinit",info);
164     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
165     PetscCheckScaLapackInfo("descinit",info);
166 
167     /* redistribute x as a column of a 2d matrix */
168     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169 
170     /* redistribute y as a row of a 2d matrix */
171     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172 
173     /* call PBLAS subroutine */
174     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175 
176     /* redistribute y from a row of a 2d matrix */
177     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178 
179   } else {   /* non-transpose */
180 
181     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
183     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
184     xlld = 1;
185     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186     PetscCheckScaLapackInfo("descinit",info);
187     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
188     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
189     ylld = PetscMax(1,A->rmap->n);
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192 
193     /* allocate 2d vectors */
194     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
197     ylld = PetscMax(1,lszy);
198 
199     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201     PetscCheckScaLapackInfo("descinit",info);
202     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
203     PetscCheckScaLapackInfo("descinit",info);
204 
205     /* redistribute x as a row of a 2d matrix */
206     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207 
208     /* redistribute y as a column of a 2d matrix */
209     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210 
211     /* call PBLAS subroutine */
212     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213 
214     /* redistribute y from a column of a 2d matrix */
215     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216 
217   }
218   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223 {
224   PetscErrorCode    ierr;
225   const PetscScalar *xarray;
226   PetscScalar       *yarray;
227 
228   PetscFunctionBegin;
229   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
230   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
231   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
232   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
233   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238 {
239   PetscErrorCode    ierr;
240   const PetscScalar *xarray;
241   PetscScalar       *yarray;
242 
243   PetscFunctionBegin;
244   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
245   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
246   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
247   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
248   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253 {
254   PetscErrorCode    ierr;
255   const PetscScalar *xarray;
256   PetscScalar       *zarray;
257 
258   PetscFunctionBegin;
259   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
260   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
261   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
262   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
263   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
264   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269 {
270   PetscErrorCode    ierr;
271   const PetscScalar *xarray;
272   PetscScalar       *zarray;
273 
274   PetscFunctionBegin;
275   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
276   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
277   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
278   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
279   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
280   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285 {
286   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289   PetscScalar   sone=1.0,zero=0.0;
290   PetscBLASInt  one=1;
291 
292   PetscFunctionBegin;
293   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294   C->assembled = PETSC_TRUE;
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
304   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
305   ierr = MatSetUp(C);CHKERRQ(ierr);
306   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307   PetscFunctionReturn(0);
308 }
309 
310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311 {
312   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315   PetscScalar   sone=1.0,zero=0.0;
316   PetscBLASInt  one=1;
317 
318   PetscFunctionBegin;
319   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320   C->assembled = PETSC_TRUE;
321   PetscFunctionReturn(0);
322 }
323 
324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325 {
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
330   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
331   ierr = MatSetUp(C);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* --------------------------------------- */
336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337 {
338   PetscFunctionBegin;
339   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340   C->ops->productsymbolic = MatProductSymbolic_AB;
341   PetscFunctionReturn(0);
342 }
343 
344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345 {
346   PetscFunctionBegin;
347   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353 {
354   PetscErrorCode ierr;
355   Mat_Product    *product = C->product;
356 
357   PetscFunctionBegin;
358   switch (product->type) {
359     case MATPRODUCT_AB:
360       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
361       break;
362     case MATPRODUCT_ABt:
363       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
364       break;
365     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366   }
367   PetscFunctionReturn(0);
368 }
369 /* --------------------------------------- */
370 
371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372 {
373   PetscErrorCode    ierr;
374   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375   PetscScalar       *darray,*d2d,v;
376   const PetscInt    *ranges;
377   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
381 
382   if (A->rmap->N<=A->cmap->N) {   /* row version */
383 
384     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
386     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
387     dlld = PetscMax(1,A->rmap->n);
388     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
389     PetscCheckScaLapackInfo("descinit",info);
390 
391     /* allocate 2d vector */
392     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
394     dlld = PetscMax(1,lszd);
395 
396     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
398     PetscCheckScaLapackInfo("descinit",info);
399 
400     /* collect diagonal */
401     for (j=1;j<=a->M;j++) {
402       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404     }
405 
406     /* redistribute d from a column of a 2d matrix */
407     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408     ierr = PetscFree(d2d);CHKERRQ(ierr);
409 
410   } else {   /* column version */
411 
412     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
414     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
415     dlld = 1;
416     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
417     PetscCheckScaLapackInfo("descinit",info);
418 
419     /* allocate 2d vector */
420     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
422 
423     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
425     PetscCheckScaLapackInfo("descinit",info);
426 
427     /* collect diagonal */
428     for (j=1;j<=a->N;j++) {
429       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431     }
432 
433     /* redistribute d from a row of a 2d matrix */
434     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435     ierr = PetscFree(d2d);CHKERRQ(ierr);
436   }
437 
438   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
439   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
440   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445 {
446   PetscErrorCode    ierr;
447   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448   const PetscScalar *d;
449   const PetscInt    *ranges;
450   PetscScalar       *d2d;
451   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
452 
453   PetscFunctionBegin;
454   if (R) {
455     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
456     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
458     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
459     dlld = 1;
460     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
461     PetscCheckScaLapackInfo("descinit",info);
462 
463     /* allocate 2d vector */
464     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
466 
467     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
469     PetscCheckScaLapackInfo("descinit",info);
470 
471     /* redistribute d to a row of a 2d matrix */
472     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
473 
474     /* broadcast along process columns */
475     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
477 
478     /* local scaling */
479     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
480 
481     ierr = PetscFree(d2d);CHKERRQ(ierr);
482     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
483   }
484   if (L) {
485     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
486     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
488     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
489     dlld = PetscMax(1,A->rmap->n);
490     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
491     PetscCheckScaLapackInfo("descinit",info);
492 
493     /* allocate 2d vector */
494     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
496     dlld = PetscMax(1,lszd);
497 
498     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
500     PetscCheckScaLapackInfo("descinit",info);
501 
502     /* redistribute d to a column of a 2d matrix */
503     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
504 
505     /* broadcast along process rows */
506     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
508 
509     /* local scaling */
510     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
511 
512     ierr = PetscFree(d2d);CHKERRQ(ierr);
513     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519 {
520   PetscFunctionBegin;
521   *missing = PETSC_FALSE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526 {
527   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528   PetscBLASInt  n,one=1;
529 
530   PetscFunctionBegin;
531   n = x->lld*x->locc;
532   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533   PetscFunctionReturn(0);
534 }
535 
536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537 {
538   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539   PetscBLASInt  i,n;
540   PetscScalar   v;
541 
542   PetscFunctionBegin;
543   n = PetscMin(x->M,x->N);
544   for (i=1;i<=n;i++) {
545     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546     v += alpha;
547     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553 {
554   PetscErrorCode ierr;
555   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557   PetscBLASInt   one=1;
558   PetscScalar    beta=1.0;
559 
560   PetscFunctionBegin;
561   MatScaLAPACKCheckDistribution(Y,1,X,3);
562   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568 {
569   PetscErrorCode ierr;
570   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
572 
573   PetscFunctionBegin;
574   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
575   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580 {
581   Mat            Bs;
582   MPI_Comm       comm;
583   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
589   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
590   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
591   b = (Mat_ScaLAPACK*)Bs->data;
592   b->M    = a->M;
593   b->N    = a->N;
594   b->mb   = a->mb;
595   b->nb   = a->nb;
596   b->rsrc = a->rsrc;
597   b->csrc = a->csrc;
598   ierr = MatSetUp(Bs);CHKERRQ(ierr);
599   *B = Bs;
600   if (op == MAT_COPY_VALUES) {
601     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
602   }
603   Bs->assembled = PETSC_TRUE;
604   PetscFunctionReturn(0);
605 }
606 
607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608 {
609   PetscErrorCode ierr;
610   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611   Mat            Bs = *B;
612   PetscBLASInt   one=1;
613   PetscScalar    sone=1.0,zero=0.0;
614 #if defined(PETSC_USE_COMPLEX)
615   PetscInt       i,j;
616 #endif
617 
618   PetscFunctionBegin;
619   if (reuse == MAT_INITIAL_MATRIX) {
620     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
621     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
622     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
623     ierr = MatSetUp(Bs);CHKERRQ(ierr);
624     *B = Bs;
625   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626   b = (Mat_ScaLAPACK*)Bs->data;
627   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628 #if defined(PETSC_USE_COMPLEX)
629   /* undo conjugation */
630   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631 #endif
632   Bs->assembled = PETSC_TRUE;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637 {
638   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639   PetscInt      i,j;
640 
641   PetscFunctionBegin;
642   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647 {
648   PetscErrorCode ierr;
649   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650   Mat            Bs = *B;
651   PetscBLASInt   one=1;
652   PetscScalar    sone=1.0,zero=0.0;
653 
654   PetscFunctionBegin;
655   if (reuse == MAT_INITIAL_MATRIX) {
656     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
657     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
658     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
659     ierr = MatSetUp(Bs);CHKERRQ(ierr);
660     *B = Bs;
661   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
662   b = (Mat_ScaLAPACK*)Bs->data;
663   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
664   Bs->assembled = PETSC_TRUE;
665   PetscFunctionReturn(0);
666 }
667 
668 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
669 {
670   PetscErrorCode ierr;
671   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
672   PetscScalar    *x,*x2d;
673   const PetscInt *ranges;
674   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
675 
676   PetscFunctionBegin;
677   ierr = VecCopy(B,X);CHKERRQ(ierr);
678   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
679 
680   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
681   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
682   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
683   xlld = PetscMax(1,A->rmap->n);
684   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
685   PetscCheckScaLapackInfo("descinit",info);
686 
687   /* allocate 2d vector */
688   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
689   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
690   xlld = PetscMax(1,lszx);
691 
692   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
693   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
694   PetscCheckScaLapackInfo("descinit",info);
695 
696   /* redistribute x as a column of a 2d matrix */
697   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
698 
699   /* call ScaLAPACK subroutine */
700   switch (A->factortype) {
701     case MAT_FACTOR_LU:
702       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
703       PetscCheckScaLapackInfo("getrs",info);
704       break;
705     case MAT_FACTOR_CHOLESKY:
706       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
707       PetscCheckScaLapackInfo("potrs",info);
708       break;
709     default:
710       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
711   }
712 
713   /* redistribute x from a column of a 2d matrix */
714   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
715 
716   ierr = PetscFree(x2d);CHKERRQ(ierr);
717   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
722 {
723   PetscErrorCode ierr;
724 
725   PetscFunctionBegin;
726   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
727   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
732 {
733   PetscErrorCode ierr;
734   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
735   PetscBool      flg1,flg2;
736   PetscBLASInt   one=1,info;
737 
738   PetscFunctionBegin;
739   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
741   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
742   MatScaLAPACKCheckDistribution(B,1,X,2);
743   b = (Mat_ScaLAPACK*)B->data;
744   x = (Mat_ScaLAPACK*)X->data;
745   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
746 
747   switch (A->factortype) {
748     case MAT_FACTOR_LU:
749       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
750       PetscCheckScaLapackInfo("getrs",info);
751       break;
752     case MAT_FACTOR_CHOLESKY:
753       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
754       PetscCheckScaLapackInfo("potrs",info);
755       break;
756     default:
757       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
763 {
764   PetscErrorCode ierr;
765   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
766   PetscBLASInt   one=1,info;
767 
768   PetscFunctionBegin;
769   if (!a->pivots) {
770     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
771     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
772   }
773   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
774   PetscCheckScaLapackInfo("getrf",info);
775   A->factortype = MAT_FACTOR_LU;
776   A->assembled  = PETSC_TRUE;
777 
778   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
779   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
784 {
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
789   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792 
793 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
794 {
795   PetscFunctionBegin;
796   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
797   PetscFunctionReturn(0);
798 }
799 
800 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
801 {
802   PetscErrorCode ierr;
803   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
804   PetscBLASInt   one=1,info;
805 
806   PetscFunctionBegin;
807   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
808   PetscCheckScaLapackInfo("potrf",info);
809   A->factortype = MAT_FACTOR_CHOLESKY;
810   A->assembled  = PETSC_TRUE;
811 
812   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
813   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
818 {
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
823   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
828 {
829   PetscFunctionBegin;
830   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
835 {
836   PetscFunctionBegin;
837   *type = MATSOLVERSCALAPACK;
838   PetscFunctionReturn(0);
839 }
840 
841 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
842 {
843   Mat            B;
844   PetscErrorCode ierr;
845 
846   PetscFunctionBegin;
847   /* Create the factorization matrix */
848   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
849   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
850   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
851   ierr = MatSetUp(B);CHKERRQ(ierr);
852   B->factortype = ftype;
853   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
854   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
855 
856   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
857   *F = B;
858   PetscFunctionReturn(0);
859 }
860 
861 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
862 {
863   PetscErrorCode ierr;
864 
865   PetscFunctionBegin;
866   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
867   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
872 {
873   PetscErrorCode ierr;
874   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
875   PetscBLASInt   one=1,lwork=0;
876   const char     *ntype;
877   PetscScalar    *work=NULL,dummy;
878 
879   PetscFunctionBegin;
880   switch (type){
881     case NORM_1:
882       ntype = "1";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     case NORM_FROBENIUS:
886       ntype = "F";
887       work  = &dummy;
888       break;
889     case NORM_INFINITY:
890       ntype = "I";
891       lwork = PetscMax(a->locr,a->locc);
892       break;
893     default:
894       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
895   }
896   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
897   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
898   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
899   PetscFunctionReturn(0);
900 }
901 
902 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
903 {
904   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
905   PetscErrorCode ierr;
906 
907   PetscFunctionBegin;
908   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
913 {
914   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
917 
918   PetscFunctionBegin;
919   if (rows) {
920     n     = a->locr;
921     nb    = a->mb;
922     isrc  = a->rsrc;
923     nproc = a->grid->nprow;
924     iproc = a->grid->myrow;
925     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
926     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
927     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
928   }
929   if (cols) {
930     n     = a->locc;
931     nb    = a->nb;
932     isrc  = a->csrc;
933     nproc = a->grid->npcol;
934     iproc = a->grid->mycol;
935     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
936     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
937     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
938   }
939   PetscFunctionReturn(0);
940 }
941 
942 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
943 {
944   PetscErrorCode ierr;
945   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
946   Mat            Bmpi;
947   MPI_Comm       comm;
948   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
949   const PetscInt *ranges,*branges,*cwork;
950   const PetscScalar *vwork;
951   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
952   PetscScalar    *barray;
953   PetscBool      differ=PETSC_FALSE;
954   PetscMPIInt    size;
955 
956   PetscFunctionBegin;
957   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
958   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
959 
960   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
961     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
962     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
963     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
964   }
965 
966   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
967     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
968     m = PETSC_DECIDE;
969     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
970     n = PETSC_DECIDE;
971     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
972     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
973     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
974     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
975 
976     /* create ScaLAPACK descriptor for B (1d block distribution) */
977     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
978     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
979     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
980     PetscCheckScaLapackInfo("descinit",info);
981 
982     /* redistribute matrix */
983     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
984     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
985     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
986     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988 
989     /* transfer rows of auxiliary matrix to the final matrix B */
990     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
991     for (i=rstart;i<rend;i++) {
992       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
993       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
994       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
995     }
996     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
997     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
998     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
999 
1000   } else {  /* normal cases */
1001 
1002     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1003     else {
1004       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1005       m = PETSC_DECIDE;
1006       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1007       n = PETSC_DECIDE;
1008       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1009       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1010       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1011       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1012     }
1013 
1014     /* create ScaLAPACK descriptor for B (1d block distribution) */
1015     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1016     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1017     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1018     PetscCheckScaLapackInfo("descinit",info);
1019 
1020     /* redistribute matrix */
1021     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1022     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1023     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1024 
1025     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1026     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1027     if (reuse == MAT_INPLACE_MATRIX) {
1028       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1029     } else *B = Bmpi;
1030   }
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1035 {
1036   PetscErrorCode ierr;
1037   Mat_ScaLAPACK  *b;
1038   Mat            Bmpi;
1039   MPI_Comm       comm;
1040   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1041   const PetscInt *ranges;
1042   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1043   PetscScalar    *aarray;
1044   PetscInt       lda;
1045 
1046   PetscFunctionBegin;
1047   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1048 
1049   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1050   else {
1051     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1052     m = PETSC_DECIDE;
1053     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1054     n = PETSC_DECIDE;
1055     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1056     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1057     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1058     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1059   }
1060   b = (Mat_ScaLAPACK*)Bmpi->data;
1061 
1062   /* create ScaLAPACK descriptor for A (1d block distribution) */
1063   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1064   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1065   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1066   lld = PetscMax(lda,1);  /* local leading dimension */
1067   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1068   PetscCheckScaLapackInfo("descinit",info);
1069 
1070   /* redistribute matrix */
1071   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1072   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1073   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1074 
1075   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1076   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1077   if (reuse == MAT_INPLACE_MATRIX) {
1078     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1079   } else *B = Bmpi;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1084 {
1085   Mat               mat_scal;
1086   PetscErrorCode    ierr;
1087   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1088   const PetscInt    *cols;
1089   const PetscScalar *vals;
1090 
1091   PetscFunctionBegin;
1092   if (reuse == MAT_REUSE_MATRIX) {
1093     mat_scal = *newmat;
1094     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1095   } else {
1096     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1097     m = PETSC_DECIDE;
1098     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1099     n = PETSC_DECIDE;
1100     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1101     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1102     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1103     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1104   }
1105   for (row=rstart;row<rend;row++) {
1106     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1107     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1108     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1109   }
1110   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1111   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1112 
1113   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1114   else *newmat = mat_scal;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1119 {
1120   Mat               mat_scal;
1121   PetscErrorCode    ierr;
1122   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1123   const PetscInt    *cols;
1124   const PetscScalar *vals;
1125   PetscScalar       v;
1126 
1127   PetscFunctionBegin;
1128   if (reuse == MAT_REUSE_MATRIX) {
1129     mat_scal = *newmat;
1130     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1131   } else {
1132     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1133     m = PETSC_DECIDE;
1134     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1135     n = PETSC_DECIDE;
1136     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1137     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1138     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1139     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1140   }
1141   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1142   for (row=rstart;row<rend;row++) {
1143     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1144     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1145     for (j=0;j<ncols;j++) { /* lower triangular part */
1146       if (cols[j] == row) continue;
1147       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1148       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1149     }
1150     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1151   }
1152   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1153   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1154   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1155 
1156   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1157   else *newmat = mat_scal;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1162 {
1163   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1164   PetscErrorCode ierr;
1165   PetscInt       sz=0;
1166 
1167   PetscFunctionBegin;
1168   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1169   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1170   if (!a->lld) a->lld = a->locr;
1171 
1172   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1173   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1174   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1175   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1176 
1177   A->preallocated = PETSC_TRUE;
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1182 {
1183   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1184   PetscErrorCode     ierr;
1185   Mat_ScaLAPACK_Grid *grid;
1186   PetscBool          flg;
1187   MPI_Comm           icomm;
1188 
1189   PetscFunctionBegin;
1190   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1191   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1192   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1193   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1194   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1195   if (--grid->grid_refct == 0) {
1196     Cblacs_gridexit(grid->ictxt);
1197     Cblacs_gridexit(grid->ictxrow);
1198     Cblacs_gridexit(grid->ictxcol);
1199     ierr = PetscFree(grid);CHKERRQ(ierr);
1200     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr);
1201   }
1202   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1203   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1204   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscFree(A->data);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1212 {
1213   PetscErrorCode ierr;
1214   const PetscInt *ranges;
1215   PetscMPIInt    size;
1216   PetscInt       i,n;
1217 
1218   PetscFunctionBegin;
1219   ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr);
1220   if (size>2) {
1221     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1222     n = ranges[1]-ranges[0];
1223     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1224     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1230 {
1231   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1232   PetscErrorCode ierr;
1233   PetscBLASInt   info=0;
1234 
1235   PetscFunctionBegin;
1236   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1237   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1238 
1239   /* check that the layout is as enforced by MatCreateScaLAPACK */
1240   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1241   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1242 
1243   /* compute local sizes */
1244   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1245   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1246   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1247   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1248   a->lld  = PetscMax(1,a->locr);
1249 
1250   /* allocate local array */
1251   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1252 
1253   /* set up ScaLAPACK descriptor */
1254   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1255   PetscCheckScaLapackInfo("descinit",info);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1260 {
1261   PetscErrorCode ierr;
1262   PetscInt       nstash,reallocs;
1263 
1264   PetscFunctionBegin;
1265   if (A->nooffprocentries) PetscFunctionReturn(0);
1266   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1267   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1268   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1273 {
1274   PetscErrorCode ierr;
1275   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1276   PetscMPIInt    n;
1277   PetscInt       i,flg,*row,*col;
1278   PetscScalar    *val;
1279   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1280 
1281   PetscFunctionBegin;
1282   if (A->nooffprocentries) PetscFunctionReturn(0);
1283   while (1) {
1284     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1285     if (!flg) break;
1286     for (i=0;i<n;i++) {
1287       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1288       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1289       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1290       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1291       switch (A->insertmode) {
1292         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1293         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1294         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1295       }
1296     }
1297   }
1298   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1303 {
1304   PetscErrorCode ierr;
1305   Mat            Adense,As;
1306   MPI_Comm       comm;
1307 
1308   PetscFunctionBegin;
1309   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1310   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1311   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1312   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1313   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1314   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1315   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /* -------------------------------------------------------------------*/
1320 static struct _MatOps MatOps_Values = {
1321        MatSetValues_ScaLAPACK,
1322        0,
1323        0,
1324        MatMult_ScaLAPACK,
1325 /* 4*/ MatMultAdd_ScaLAPACK,
1326        MatMultTranspose_ScaLAPACK,
1327        MatMultTransposeAdd_ScaLAPACK,
1328        MatSolve_ScaLAPACK,
1329        MatSolveAdd_ScaLAPACK,
1330        0,
1331 /*10*/ 0,
1332        MatLUFactor_ScaLAPACK,
1333        MatCholeskyFactor_ScaLAPACK,
1334        0,
1335        MatTranspose_ScaLAPACK,
1336 /*15*/ MatGetInfo_ScaLAPACK,
1337        0,
1338        MatGetDiagonal_ScaLAPACK,
1339        MatDiagonalScale_ScaLAPACK,
1340        MatNorm_ScaLAPACK,
1341 /*20*/ MatAssemblyBegin_ScaLAPACK,
1342        MatAssemblyEnd_ScaLAPACK,
1343        MatSetOption_ScaLAPACK,
1344        MatZeroEntries_ScaLAPACK,
1345 /*24*/ 0,
1346        MatLUFactorSymbolic_ScaLAPACK,
1347        MatLUFactorNumeric_ScaLAPACK,
1348        MatCholeskyFactorSymbolic_ScaLAPACK,
1349        MatCholeskyFactorNumeric_ScaLAPACK,
1350 /*29*/ MatSetUp_ScaLAPACK,
1351        0,
1352        0,
1353        0,
1354        0,
1355 /*34*/ MatDuplicate_ScaLAPACK,
1356        0,
1357        0,
1358        0,
1359        0,
1360 /*39*/ MatAXPY_ScaLAPACK,
1361        0,
1362        0,
1363        0,
1364        MatCopy_ScaLAPACK,
1365 /*44*/ 0,
1366        MatScale_ScaLAPACK,
1367        MatShift_ScaLAPACK,
1368        0,
1369        0,
1370 /*49*/ 0,
1371        0,
1372        0,
1373        0,
1374        0,
1375 /*54*/ 0,
1376        0,
1377        0,
1378        0,
1379        0,
1380 /*59*/ 0,
1381        MatDestroy_ScaLAPACK,
1382        MatView_ScaLAPACK,
1383        0,
1384        0,
1385 /*64*/ 0,
1386        0,
1387        0,
1388        0,
1389        0,
1390 /*69*/ 0,
1391        0,
1392        MatConvert_ScaLAPACK_Dense,
1393        0,
1394        0,
1395 /*74*/ 0,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*79*/ 0,
1401        0,
1402        0,
1403        0,
1404        MatLoad_ScaLAPACK,
1405 /*84*/ 0,
1406        0,
1407        0,
1408        0,
1409        0,
1410 /*89*/ 0,
1411        0,
1412        MatMatMultNumeric_ScaLAPACK,
1413        0,
1414        0,
1415 /*94*/ 0,
1416        0,
1417        0,
1418        MatMatTransposeMultNumeric_ScaLAPACK,
1419        0,
1420 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1421        0,
1422        0,
1423        MatConjugate_ScaLAPACK,
1424        0,
1425 /*104*/0,
1426        0,
1427        0,
1428        0,
1429        0,
1430 /*109*/MatMatSolve_ScaLAPACK,
1431        0,
1432        0,
1433        0,
1434        MatMissingDiagonal_ScaLAPACK,
1435 /*114*/0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*119*/0,
1441        MatHermitianTranspose_ScaLAPACK,
1442        0,
1443        0,
1444        0,
1445 /*124*/0,
1446        0,
1447        0,
1448        0,
1449        0,
1450 /*129*/0,
1451        0,
1452        0,
1453        0,
1454        0,
1455 /*134*/0,
1456        0,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*140*/0,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*145*/0,
1467        0,
1468        0
1469 };
1470 
1471 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1472 {
1473   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1474   PetscInt           size=stash->size,nsends;
1475   PetscErrorCode     ierr;
1476   PetscInt           count,*sindices,**rindices,i,j,l;
1477   PetscScalar        **rvalues,*svalues;
1478   MPI_Comm           comm = stash->comm;
1479   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1480   PetscMPIInt        *sizes,*nlengths,nreceives;
1481   PetscInt           *sp_idx,*sp_idy;
1482   PetscScalar        *sp_val;
1483   PetscMatStashSpace space,space_next;
1484   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1485   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1486 
1487   PetscFunctionBegin;
1488   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1489     InsertMode addv;
1490     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1491     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1492     mat->insertmode = addv; /* in case this processor had no cache */
1493   }
1494 
1495   bs2 = stash->bs*stash->bs;
1496 
1497   /*  first count number of contributors to each processor */
1498   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1499   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1500 
1501   i     = j    = 0;
1502   space = stash->space_head;
1503   while (space) {
1504     space_next = space->next;
1505     for (l=0; l<space->local_used; l++) {
1506       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1507       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1508       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1509       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1510       nlengths[j]++; owner[i] = j;
1511       i++;
1512     }
1513     space = space_next;
1514   }
1515 
1516   /* Now check what procs get messages - and compute nsends. */
1517   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1518   for (i=0, nsends=0; i<size; i++) {
1519     if (nlengths[i]) {
1520       sizes[i] = 1; nsends++;
1521     }
1522   }
1523 
1524   {PetscMPIInt *onodes,*olengths;
1525    /* Determine the number of messages to expect, their lengths, from from-ids */
1526    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1527    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1528    /* since clubbing row,col - lengths are multiplied by 2 */
1529    for (i=0; i<nreceives; i++) olengths[i] *=2;
1530    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1531    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1532    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1533    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1534    ierr = PetscFree(onodes);CHKERRQ(ierr);
1535    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1536 
1537   /* do sends:
1538       1) starts[i] gives the starting index in svalues for stuff going to
1539          the ith processor
1540   */
1541   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1542   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1543   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1544   /* use 2 sends the first with all_a, the next with all_i and all_j */
1545   startv[0] = 0; starti[0] = 0;
1546   for (i=1; i<size; i++) {
1547     startv[i] = startv[i-1] + nlengths[i-1];
1548     starti[i] = starti[i-1] + 2*nlengths[i-1];
1549   }
1550 
1551   i     = 0;
1552   space = stash->space_head;
1553   while (space) {
1554     space_next = space->next;
1555     sp_idx     = space->idx;
1556     sp_idy     = space->idy;
1557     sp_val     = space->val;
1558     for (l=0; l<space->local_used; l++) {
1559       j = owner[i];
1560       if (bs2 == 1) {
1561         svalues[startv[j]] = sp_val[l];
1562       } else {
1563         PetscInt    k;
1564         PetscScalar *buf1,*buf2;
1565         buf1 = svalues+bs2*startv[j];
1566         buf2 = space->val + bs2*l;
1567         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1568       }
1569       sindices[starti[j]]             = sp_idx[l];
1570       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1571       startv[j]++;
1572       starti[j]++;
1573       i++;
1574     }
1575     space = space_next;
1576   }
1577   startv[0] = 0;
1578   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1579 
1580   for (i=0,count=0; i<size; i++) {
1581     if (sizes[i]) {
1582       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr);
1583       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr);
1584     }
1585   }
1586 #if defined(PETSC_USE_INFO)
1587   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1588   for (i=0; i<size; i++) {
1589     if (sizes[i]) {
1590       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1591     }
1592   }
1593 #endif
1594   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1595   ierr = PetscFree(owner);CHKERRQ(ierr);
1596   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1597   ierr = PetscFree(sizes);CHKERRQ(ierr);
1598 
1599   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1600   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1601 
1602   for (i=0; i<nreceives; i++) {
1603     recv_waits[2*i]   = recv_waits1[i];
1604     recv_waits[2*i+1] = recv_waits2[i];
1605   }
1606   stash->recv_waits = recv_waits;
1607 
1608   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1609   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1610 
1611   stash->svalues         = svalues;
1612   stash->sindices        = sindices;
1613   stash->rvalues         = rvalues;
1614   stash->rindices        = rindices;
1615   stash->send_waits      = send_waits;
1616   stash->nsends          = nsends;
1617   stash->nrecvs          = nreceives;
1618   stash->reproduce_count = 0;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1623 {
1624   PetscErrorCode ierr;
1625   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1626 
1627   PetscFunctionBegin;
1628   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1629   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1630   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1631   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1632   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 /*@
1637    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1638    the ScaLAPACK matrix
1639 
1640    Logically Collective on A
1641 
1642    Input Parameter:
1643 +  A  - a MATSCALAPACK matrix
1644 .  mb - the row block size
1645 -  nb - the column block size
1646 
1647    Level: intermediate
1648 
1649 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1650 @*/
1651 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1652 {
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1657   PetscValidLogicalCollectiveInt(A,mb,2);
1658   PetscValidLogicalCollectiveInt(A,nb,3);
1659   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1664 {
1665   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1666 
1667   PetscFunctionBegin;
1668   if (mb) *mb = a->mb;
1669   if (nb) *nb = a->nb;
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 /*@
1674    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1675    the ScaLAPACK matrix
1676 
1677    Not collective
1678 
1679    Input Parameter:
1680 .  A  - a MATSCALAPACK matrix
1681 
1682    Output Parameters:
1683 +  mb - the row block size
1684 -  nb - the column block size
1685 
1686    Level: intermediate
1687 
1688 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1689 @*/
1690 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1691 {
1692   PetscErrorCode ierr;
1693 
1694   PetscFunctionBegin;
1695   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1696   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1701 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1702 
1703 /*MC
1704    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1705 
1706    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1707 
1708    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1709 
1710    Options Database Keys:
1711 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1712 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1713 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1714 
1715    Level: beginner
1716 
1717 .seealso: MATDENSE, MATELEMENTAL
1718 M*/
1719 
1720 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1721 {
1722   Mat_ScaLAPACK      *a;
1723   PetscErrorCode     ierr;
1724   PetscBool          flg,flg1;
1725   Mat_ScaLAPACK_Grid *grid;
1726   MPI_Comm           icomm;
1727   PetscBLASInt       nprow,npcol,myrow,mycol;
1728   PetscInt           optv1,k=2,array[2]={0,0};
1729   PetscMPIInt        size;
1730 
1731   PetscFunctionBegin;
1732   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1733   A->insertmode = NOT_SET_VALUES;
1734 
1735   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1736   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1737   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1738   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1739   A->stash.ScatterDestroy = NULL;
1740 
1741   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1742   A->data = (void*)a;
1743 
1744   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1745   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1746     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr);
1747     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1748   }
1749   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1750   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr);
1751   if (!flg) {
1752     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1753 
1754     ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr);
1755     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1756 
1757     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1758     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1759     if (flg1) {
1760       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1761       grid->nprow = optv1;
1762     }
1763     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1764 
1765     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1766     grid->npcol = size/grid->nprow;
1767     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1768     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1769     grid->ictxt = Csys2blacs_handle(icomm);
1770     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1771     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1772     grid->grid_refct = 1;
1773     grid->nprow      = nprow;
1774     grid->npcol      = npcol;
1775     grid->myrow      = myrow;
1776     grid->mycol      = mycol;
1777     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1778     grid->ictxrow = Csys2blacs_handle(icomm);
1779     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1780     grid->ictxcol = Csys2blacs_handle(icomm);
1781     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1782     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr);
1783 
1784   } else grid->grid_refct++;
1785   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1786   a->grid = grid;
1787   a->mb   = DEFAULT_BLOCKSIZE;
1788   a->nb   = DEFAULT_BLOCKSIZE;
1789 
1790   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1791   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1792   if (flg) {
1793     a->mb = array[0];
1794     a->nb = (k>1)? array[1]: a->mb;
1795   }
1796   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1797 
1798   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1801   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 /*@C
1806    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1807    (2D block cyclic distribution).
1808 
1809    Collective
1810 
1811    Input Parameters:
1812 +  comm - MPI communicator
1813 .  mb   - row block size (or PETSC_DECIDE to have it set)
1814 .  nb   - column block size (or PETSC_DECIDE to have it set)
1815 .  M    - number of global rows
1816 .  N    - number of global columns
1817 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1818 -  csrc - coordinate of process that owns the first column of the distributed matrix
1819 
1820    Output Parameter:
1821 .  A - the matrix
1822 
1823    Options Database Keys:
1824 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1825 
1826    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1827    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1828    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1829 
1830    Notes:
1831    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1832    is chosen.
1833 
1834    Storage Information:
1835    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1836    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1837    significance and are thus ignored. The block sizes refer to the values
1838    used for the distributed matrix, not the same meaning as in BAIJ.
1839 
1840    Level: intermediate
1841 
1842 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1843 @*/
1844 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1845 {
1846   PetscErrorCode ierr;
1847   Mat_ScaLAPACK  *a;
1848   PetscInt       m,n;
1849 
1850   PetscFunctionBegin;
1851   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1852   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1853   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1854   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1855   m = PETSC_DECIDE;
1856   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1857   n = PETSC_DECIDE;
1858   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1859   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1860   a = (Mat_ScaLAPACK*)(*A)->data;
1861   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1862   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1863   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1864   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1865   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1866   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1867   ierr = MatSetUp(*A);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871