1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n")); 26 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval)); 27 PetscFunctionReturn(0); 28 } 29 30 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 31 { 32 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 33 PetscBool iascii; 34 PetscViewerFormat format; 35 Mat Adense; 36 37 PetscFunctionBegin; 38 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 39 if (iascii) { 40 PetscCall(PetscViewerGetFormat(viewer,&format)); 41 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 42 PetscCall(PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb)); 43 PetscCall(PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol)); 44 PetscCall(PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc)); 45 PetscCall(PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc)); 46 PetscFunctionReturn(0); 47 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 48 PetscFunctionReturn(0); 49 } 50 } 51 /* convert to dense format and call MatView() */ 52 PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense)); 53 PetscCall(MatView(Adense,viewer)); 54 PetscCall(MatDestroy(&Adense)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 59 { 60 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 61 PetscLogDouble isend[2],irecv[2]; 62 63 PetscFunctionBegin; 64 info->block_size = 1.0; 65 66 isend[0] = a->lld*a->locc; /* locally allocated */ 67 isend[1] = a->locr*a->locc; /* used submatrix */ 68 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 69 info->nz_allocated = isend[0]; 70 info->nz_used = isend[1]; 71 } else if (flag == MAT_GLOBAL_MAX) { 72 PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A))); 73 info->nz_allocated = irecv[0]; 74 info->nz_used = irecv[1]; 75 } else if (flag == MAT_GLOBAL_SUM) { 76 PetscCall(MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A))); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } 80 81 info->nz_unneeded = 0; 82 info->assemblies = A->num_ass; 83 info->mallocs = 0; 84 info->memory = ((PetscObject)A)->mem; 85 info->fill_ratio_given = 0; 86 info->fill_ratio_needed = 0; 87 info->factor_mallocs = 0; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 92 { 93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 94 95 PetscFunctionBegin; 96 switch (op) { 97 case MAT_NEW_NONZERO_LOCATIONS: 98 case MAT_NEW_NONZERO_LOCATION_ERR: 99 case MAT_NEW_NONZERO_ALLOCATION_ERR: 100 case MAT_SYMMETRIC: 101 case MAT_SORTED_FULL: 102 case MAT_HERMITIAN: 103 break; 104 case MAT_ROW_ORIENTED: 105 a->roworiented = flg; 106 break; 107 default: 108 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 109 } 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 114 { 115 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 116 PetscInt i,j; 117 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 118 PetscBool roworiented = a->roworiented; 119 120 PetscFunctionBegin; 121 PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 122 for (i=0;i<nr;i++) { 123 if (rows[i] < 0) continue; 124 PetscCall(PetscBLASIntCast(rows[i]+1,&gridx)); 125 for (j=0;j<nc;j++) { 126 if (cols[j] < 0) continue; 127 PetscCall(PetscBLASIntCast(cols[j]+1,&gcidx)); 128 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 129 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 130 if (roworiented) { 131 switch (imode) { 132 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 133 default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 134 } 135 } else { 136 switch (imode) { 137 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break; 138 default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break; 139 } 140 } 141 } else { 142 PetscCheck(!A->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 143 A->assembled = PETSC_FALSE; 144 PetscCall(MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES))); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 152 { 153 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 154 PetscScalar *x2d,*y2d,alpha=1.0; 155 const PetscInt *ranges; 156 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 157 158 PetscFunctionBegin; 159 if (transpose) { 160 161 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 162 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 163 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 164 xlld = PetscMax(1,A->rmap->n); 165 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 166 PetscCheckScaLapackInfo("descinit",info); 167 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 168 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* y block size */ 169 ylld = 1; 170 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 171 PetscCheckScaLapackInfo("descinit",info); 172 173 /* allocate 2d vectors */ 174 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 175 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 176 PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 177 xlld = PetscMax(1,lszx); 178 179 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 180 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 181 PetscCheckScaLapackInfo("descinit",info); 182 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 183 PetscCheckScaLapackInfo("descinit",info); 184 185 /* redistribute x as a column of a 2d matrix */ 186 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 187 188 /* redistribute y as a row of a 2d matrix */ 189 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 190 191 /* call PBLAS subroutine */ 192 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 193 194 /* redistribute y from a row of a 2d matrix */ 195 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 196 197 } else { /* non-transpose */ 198 199 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 200 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 201 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* x block size */ 202 xlld = 1; 203 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 204 PetscCheckScaLapackInfo("descinit",info); 205 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 206 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* y block size */ 207 ylld = PetscMax(1,A->rmap->n); 208 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 209 PetscCheckScaLapackInfo("descinit",info); 210 211 /* allocate 2d vectors */ 212 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 213 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 214 PetscCall(PetscMalloc2(lszx,&x2d,lszy,&y2d)); 215 ylld = PetscMax(1,lszy); 216 217 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 218 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 219 PetscCheckScaLapackInfo("descinit",info); 220 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 221 PetscCheckScaLapackInfo("descinit",info); 222 223 /* redistribute x as a row of a 2d matrix */ 224 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 225 226 /* redistribute y as a column of a 2d matrix */ 227 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 228 229 /* call PBLAS subroutine */ 230 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 231 232 /* redistribute y from a column of a 2d matrix */ 233 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 234 235 } 236 PetscCall(PetscFree2(x2d,y2d)); 237 PetscFunctionReturn(0); 238 } 239 240 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 241 { 242 const PetscScalar *xarray; 243 PetscScalar *yarray; 244 245 PetscFunctionBegin; 246 PetscCall(VecGetArrayRead(x,&xarray)); 247 PetscCall(VecGetArray(y,&yarray)); 248 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray)); 249 PetscCall(VecRestoreArrayRead(x,&xarray)); 250 PetscCall(VecRestoreArray(y,&yarray)); 251 PetscFunctionReturn(0); 252 } 253 254 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 255 { 256 const PetscScalar *xarray; 257 PetscScalar *yarray; 258 259 PetscFunctionBegin; 260 PetscCall(VecGetArrayRead(x,&xarray)); 261 PetscCall(VecGetArray(y,&yarray)); 262 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray)); 263 PetscCall(VecRestoreArrayRead(x,&xarray)); 264 PetscCall(VecRestoreArray(y,&yarray)); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269 { 270 const PetscScalar *xarray; 271 PetscScalar *zarray; 272 273 PetscFunctionBegin; 274 if (y != z) PetscCall(VecCopy(y,z)); 275 PetscCall(VecGetArrayRead(x,&xarray)); 276 PetscCall(VecGetArray(z,&zarray)); 277 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray)); 278 PetscCall(VecRestoreArrayRead(x,&xarray)); 279 PetscCall(VecRestoreArray(z,&zarray)); 280 PetscFunctionReturn(0); 281 } 282 283 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 284 { 285 const PetscScalar *xarray; 286 PetscScalar *zarray; 287 288 PetscFunctionBegin; 289 if (y != z) PetscCall(VecCopy(y,z)); 290 PetscCall(VecGetArrayRead(x,&xarray)); 291 PetscCall(VecGetArray(z,&zarray)); 292 PetscCall(MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray)); 293 PetscCall(VecRestoreArrayRead(x,&xarray)); 294 PetscCall(VecRestoreArray(z,&zarray)); 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 299 { 300 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 301 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 302 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 303 PetscScalar sone=1.0,zero=0.0; 304 PetscBLASInt one=1; 305 306 PetscFunctionBegin; 307 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 308 C->assembled = PETSC_TRUE; 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 313 { 314 PetscFunctionBegin; 315 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 316 PetscCall(MatSetType(C,MATSCALAPACK)); 317 PetscCall(MatSetUp(C)); 318 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 319 PetscFunctionReturn(0); 320 } 321 322 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 323 { 324 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 325 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 326 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 327 PetscScalar sone=1.0,zero=0.0; 328 PetscBLASInt one=1; 329 330 PetscFunctionBegin; 331 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 332 C->assembled = PETSC_TRUE; 333 PetscFunctionReturn(0); 334 } 335 336 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 337 { 338 PetscFunctionBegin; 339 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE)); 340 PetscCall(MatSetType(C,MATSCALAPACK)); 341 PetscCall(MatSetUp(C)); 342 PetscFunctionReturn(0); 343 } 344 345 /* --------------------------------------- */ 346 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 347 { 348 PetscFunctionBegin; 349 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 350 C->ops->productsymbolic = MatProductSymbolic_AB; 351 PetscFunctionReturn(0); 352 } 353 354 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 355 { 356 PetscFunctionBegin; 357 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 358 C->ops->productsymbolic = MatProductSymbolic_ABt; 359 PetscFunctionReturn(0); 360 } 361 362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 363 { 364 Mat_Product *product = C->product; 365 366 PetscFunctionBegin; 367 switch (product->type) { 368 case MATPRODUCT_AB: 369 PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C)); 370 break; 371 case MATPRODUCT_ABt: 372 PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C)); 373 break; 374 default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 375 } 376 PetscFunctionReturn(0); 377 } 378 /* --------------------------------------- */ 379 380 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 381 { 382 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 383 PetscScalar *darray,*d2d,v; 384 const PetscInt *ranges; 385 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 386 387 PetscFunctionBegin; 388 PetscCall(VecGetArray(D,&darray)); 389 390 if (A->rmap->N<=A->cmap->N) { /* row version */ 391 392 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 393 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 394 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 395 dlld = PetscMax(1,A->rmap->n); 396 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 397 PetscCheckScaLapackInfo("descinit",info); 398 399 /* allocate 2d vector */ 400 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 401 PetscCall(PetscCalloc1(lszd,&d2d)); 402 dlld = PetscMax(1,lszd); 403 404 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 405 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 406 PetscCheckScaLapackInfo("descinit",info); 407 408 /* collect diagonal */ 409 for (j=1;j<=a->M;j++) { 410 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 411 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 412 } 413 414 /* redistribute d from a column of a 2d matrix */ 415 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 416 PetscCall(PetscFree(d2d)); 417 418 } else { /* column version */ 419 420 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 421 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 422 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 423 dlld = 1; 424 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 425 PetscCheckScaLapackInfo("descinit",info); 426 427 /* allocate 2d vector */ 428 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 429 PetscCall(PetscCalloc1(lszd,&d2d)); 430 431 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 432 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 433 PetscCheckScaLapackInfo("descinit",info); 434 435 /* collect diagonal */ 436 for (j=1;j<=a->N;j++) { 437 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 438 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 439 } 440 441 /* redistribute d from a row of a 2d matrix */ 442 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 443 PetscCall(PetscFree(d2d)); 444 } 445 446 PetscCall(VecRestoreArray(D,&darray)); 447 PetscCall(VecAssemblyBegin(D)); 448 PetscCall(VecAssemblyEnd(D)); 449 PetscFunctionReturn(0); 450 } 451 452 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 453 { 454 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 455 const PetscScalar *d; 456 const PetscInt *ranges; 457 PetscScalar *d2d; 458 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 459 460 PetscFunctionBegin; 461 if (R) { 462 PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d)); 463 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 464 PetscCall(PetscLayoutGetRanges(A->cmap,&ranges)); 465 PetscCall(PetscBLASIntCast(ranges[1],&nb)); /* D block size */ 466 dlld = 1; 467 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 468 PetscCheckScaLapackInfo("descinit",info); 469 470 /* allocate 2d vector */ 471 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 472 PetscCall(PetscCalloc1(lszd,&d2d)); 473 474 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 475 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 476 PetscCheckScaLapackInfo("descinit",info); 477 478 /* redistribute d to a row of a 2d matrix */ 479 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 480 481 /* broadcast along process columns */ 482 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 483 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 484 485 /* local scaling */ 486 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 487 488 PetscCall(PetscFree(d2d)); 489 PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d)); 490 } 491 if (L) { 492 PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d)); 493 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 494 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 495 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* D block size */ 496 dlld = PetscMax(1,A->rmap->n); 497 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 498 PetscCheckScaLapackInfo("descinit",info); 499 500 /* allocate 2d vector */ 501 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 502 PetscCall(PetscCalloc1(lszd,&d2d)); 503 dlld = PetscMax(1,lszd); 504 505 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 506 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 507 PetscCheckScaLapackInfo("descinit",info); 508 509 /* redistribute d to a column of a 2d matrix */ 510 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 511 512 /* broadcast along process rows */ 513 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 514 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 515 516 /* local scaling */ 517 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 518 519 PetscCall(PetscFree(d2d)); 520 PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d)); 521 } 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 526 { 527 PetscFunctionBegin; 528 *missing = PETSC_FALSE; 529 PetscFunctionReturn(0); 530 } 531 532 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 533 { 534 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 535 PetscBLASInt n,one=1; 536 537 PetscFunctionBegin; 538 n = x->lld*x->locc; 539 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 540 PetscFunctionReturn(0); 541 } 542 543 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 544 { 545 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 546 PetscBLASInt i,n; 547 PetscScalar v; 548 549 PetscFunctionBegin; 550 n = PetscMin(x->M,x->N); 551 for (i=1;i<=n;i++) { 552 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 553 v += alpha; 554 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 555 } 556 PetscFunctionReturn(0); 557 } 558 559 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 560 { 561 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 562 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 563 PetscBLASInt one=1; 564 PetscScalar beta=1.0; 565 566 PetscFunctionBegin; 567 MatScaLAPACKCheckDistribution(Y,1,X,3); 568 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 569 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 570 PetscFunctionReturn(0); 571 } 572 573 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 574 { 575 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 576 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 577 578 PetscFunctionBegin; 579 PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 580 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 581 PetscFunctionReturn(0); 582 } 583 584 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 585 { 586 Mat Bs; 587 MPI_Comm comm; 588 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 589 590 PetscFunctionBegin; 591 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 592 PetscCall(MatCreate(comm,&Bs)); 593 PetscCall(MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 594 PetscCall(MatSetType(Bs,MATSCALAPACK)); 595 b = (Mat_ScaLAPACK*)Bs->data; 596 b->M = a->M; 597 b->N = a->N; 598 b->mb = a->mb; 599 b->nb = a->nb; 600 b->rsrc = a->rsrc; 601 b->csrc = a->csrc; 602 PetscCall(MatSetUp(Bs)); 603 *B = Bs; 604 if (op == MAT_COPY_VALUES) { 605 PetscCall(PetscArraycpy(b->loc,a->loc,a->lld*a->locc)); 606 } 607 Bs->assembled = PETSC_TRUE; 608 PetscFunctionReturn(0); 609 } 610 611 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 612 { 613 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 614 Mat Bs = *B; 615 PetscBLASInt one=1; 616 PetscScalar sone=1.0,zero=0.0; 617 #if defined(PETSC_USE_COMPLEX) 618 PetscInt i,j; 619 #endif 620 621 PetscFunctionBegin; 622 if (reuse == MAT_INITIAL_MATRIX) { 623 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 624 *B = Bs; 625 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 626 b = (Mat_ScaLAPACK*)Bs->data; 627 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 628 #if defined(PETSC_USE_COMPLEX) 629 /* undo conjugation */ 630 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 631 #endif 632 Bs->assembled = PETSC_TRUE; 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 637 { 638 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 639 PetscInt i,j; 640 641 PetscFunctionBegin; 642 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 643 PetscFunctionReturn(0); 644 } 645 646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 647 { 648 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 649 Mat Bs = *B; 650 PetscBLASInt one=1; 651 PetscScalar sone=1.0,zero=0.0; 652 653 PetscFunctionBegin; 654 if (reuse == MAT_INITIAL_MATRIX) { 655 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs)); 656 *B = Bs; 657 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 658 b = (Mat_ScaLAPACK*)Bs->data; 659 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 660 Bs->assembled = PETSC_TRUE; 661 PetscFunctionReturn(0); 662 } 663 664 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 665 { 666 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 667 PetscScalar *x,*x2d; 668 const PetscInt *ranges; 669 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 670 671 PetscFunctionBegin; 672 PetscCall(VecCopy(B,X)); 673 PetscCall(VecGetArray(X,&x)); 674 675 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 676 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 677 PetscCall(PetscBLASIntCast(ranges[1],&mb)); /* x block size */ 678 xlld = PetscMax(1,A->rmap->n); 679 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 680 PetscCheckScaLapackInfo("descinit",info); 681 682 /* allocate 2d vector */ 683 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 684 PetscCall(PetscMalloc1(lszx,&x2d)); 685 xlld = PetscMax(1,lszx); 686 687 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 688 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 689 PetscCheckScaLapackInfo("descinit",info); 690 691 /* redistribute x as a column of a 2d matrix */ 692 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 693 694 /* call ScaLAPACK subroutine */ 695 switch (A->factortype) { 696 case MAT_FACTOR_LU: 697 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 698 PetscCheckScaLapackInfo("getrs",info); 699 break; 700 case MAT_FACTOR_CHOLESKY: 701 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 702 PetscCheckScaLapackInfo("potrs",info); 703 break; 704 default: 705 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 706 } 707 708 /* redistribute x from a column of a 2d matrix */ 709 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 710 711 PetscCall(PetscFree(x2d)); 712 PetscCall(VecRestoreArray(X,&x)); 713 PetscFunctionReturn(0); 714 } 715 716 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 717 { 718 PetscFunctionBegin; 719 PetscCall(MatSolve_ScaLAPACK(A,B,X)); 720 PetscCall(VecAXPY(X,1,Y)); 721 PetscFunctionReturn(0); 722 } 723 724 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 725 { 726 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 727 PetscBool flg1,flg2; 728 PetscBLASInt one=1,info; 729 730 PetscFunctionBegin; 731 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1)); 732 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2)); 733 PetscCheck((flg1 && flg2),PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 734 MatScaLAPACKCheckDistribution(B,1,X,2); 735 b = (Mat_ScaLAPACK*)B->data; 736 x = (Mat_ScaLAPACK*)X->data; 737 PetscCall(PetscArraycpy(x->loc,b->loc,b->lld*b->locc)); 738 739 switch (A->factortype) { 740 case MAT_FACTOR_LU: 741 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 742 PetscCheckScaLapackInfo("getrs",info); 743 break; 744 case MAT_FACTOR_CHOLESKY: 745 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 746 PetscCheckScaLapackInfo("potrs",info); 747 break; 748 default: 749 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 750 } 751 PetscFunctionReturn(0); 752 } 753 754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 755 { 756 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 757 PetscBLASInt one=1,info; 758 759 PetscFunctionBegin; 760 if (!a->pivots) { 761 PetscCall(PetscMalloc1(a->locr+a->mb,&a->pivots)); 762 PetscCall(PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt))); 763 } 764 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 765 PetscCheckScaLapackInfo("getrf",info); 766 A->factortype = MAT_FACTOR_LU; 767 A->assembled = PETSC_TRUE; 768 769 PetscCall(PetscFree(A->solvertype)); 770 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 771 PetscFunctionReturn(0); 772 } 773 774 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 775 { 776 PetscFunctionBegin; 777 PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 778 PetscCall(MatLUFactor_ScaLAPACK(F,0,0,info)); 779 PetscFunctionReturn(0); 780 } 781 782 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 783 { 784 PetscFunctionBegin; 785 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 786 PetscFunctionReturn(0); 787 } 788 789 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 790 { 791 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 792 PetscBLASInt one=1,info; 793 794 PetscFunctionBegin; 795 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 796 PetscCheckScaLapackInfo("potrf",info); 797 A->factortype = MAT_FACTOR_CHOLESKY; 798 A->assembled = PETSC_TRUE; 799 800 PetscCall(PetscFree(A->solvertype)); 801 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype)); 802 PetscFunctionReturn(0); 803 } 804 805 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 806 { 807 PetscFunctionBegin; 808 PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN)); 809 PetscCall(MatCholeskyFactor_ScaLAPACK(F,0,info)); 810 PetscFunctionReturn(0); 811 } 812 813 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 814 { 815 PetscFunctionBegin; 816 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 821 { 822 PetscFunctionBegin; 823 *type = MATSOLVERSCALAPACK; 824 PetscFunctionReturn(0); 825 } 826 827 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 828 { 829 Mat B; 830 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 831 832 PetscFunctionBegin; 833 /* Create the factorization matrix */ 834 PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B)); 835 B->trivialsymbolic = PETSC_TRUE; 836 B->factortype = ftype; 837 PetscCall(PetscFree(B->solvertype)); 838 PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype)); 839 840 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack)); 841 *F = B; 842 PetscFunctionReturn(0); 843 } 844 845 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 846 { 847 PetscFunctionBegin; 848 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack)); 849 PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack)); 850 PetscFunctionReturn(0); 851 } 852 853 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 854 { 855 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 856 PetscBLASInt one=1,lwork=0; 857 const char *ntype; 858 PetscScalar *work=NULL,dummy; 859 860 PetscFunctionBegin; 861 switch (type) { 862 case NORM_1: 863 ntype = "1"; 864 lwork = PetscMax(a->locr,a->locc); 865 break; 866 case NORM_FROBENIUS: 867 ntype = "F"; 868 work = &dummy; 869 break; 870 case NORM_INFINITY: 871 ntype = "I"; 872 lwork = PetscMax(a->locr,a->locc); 873 break; 874 default: 875 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 876 } 877 if (lwork) PetscCall(PetscMalloc1(lwork,&work)); 878 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 879 if (lwork) PetscCall(PetscFree(work)); 880 PetscFunctionReturn(0); 881 } 882 883 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 884 { 885 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 886 887 PetscFunctionBegin; 888 PetscCall(PetscArrayzero(a->loc,a->lld*a->locc)); 889 PetscFunctionReturn(0); 890 } 891 892 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 893 { 894 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 895 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 896 897 PetscFunctionBegin; 898 if (rows) { 899 n = a->locr; 900 nb = a->mb; 901 isrc = a->rsrc; 902 nproc = a->grid->nprow; 903 iproc = a->grid->myrow; 904 PetscCall(PetscMalloc1(n,&idx)); 905 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 906 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows)); 907 } 908 if (cols) { 909 n = a->locc; 910 nb = a->nb; 911 isrc = a->csrc; 912 nproc = a->grid->npcol; 913 iproc = a->grid->mycol; 914 PetscCall(PetscMalloc1(n,&idx)); 915 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 916 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols)); 917 } 918 PetscFunctionReturn(0); 919 } 920 921 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 922 { 923 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 924 Mat Bmpi; 925 MPI_Comm comm; 926 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 927 const PetscInt *ranges,*branges,*cwork; 928 const PetscScalar *vwork; 929 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 930 PetscScalar *barray; 931 PetscBool differ=PETSC_FALSE; 932 PetscMPIInt size; 933 934 PetscFunctionBegin; 935 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 936 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 937 938 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 939 PetscCallMPI(MPI_Comm_size(comm,&size)); 940 PetscCall(PetscLayoutGetRanges((*B)->rmap,&branges)); 941 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 942 } 943 944 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 945 PetscCall(MatCreate(comm,&Bmpi)); 946 m = PETSC_DECIDE; 947 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 948 n = PETSC_DECIDE; 949 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 950 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 951 PetscCall(MatSetType(Bmpi,MATDENSE)); 952 PetscCall(MatSetUp(Bmpi)); 953 954 /* create ScaLAPACK descriptor for B (1d block distribution) */ 955 PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 956 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 957 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 958 PetscCheckScaLapackInfo("descinit",info); 959 960 /* redistribute matrix */ 961 PetscCall(MatDenseGetArray(Bmpi,&barray)); 962 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 963 PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 964 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 965 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 966 967 /* transfer rows of auxiliary matrix to the final matrix B */ 968 PetscCall(MatGetOwnershipRange(Bmpi,&rstart,&rend)); 969 for (i=rstart;i<rend;i++) { 970 PetscCall(MatGetRow(Bmpi,i,&nz,&cwork,&vwork)); 971 PetscCall(MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES)); 972 PetscCall(MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork)); 973 } 974 PetscCall(MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY)); 975 PetscCall(MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY)); 976 PetscCall(MatDestroy(&Bmpi)); 977 978 } else { /* normal cases */ 979 980 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 981 else { 982 PetscCall(MatCreate(comm,&Bmpi)); 983 m = PETSC_DECIDE; 984 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 985 n = PETSC_DECIDE; 986 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 987 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 988 PetscCall(MatSetType(Bmpi,MATDENSE)); 989 PetscCall(MatSetUp(Bmpi)); 990 } 991 992 /* create ScaLAPACK descriptor for B (1d block distribution) */ 993 PetscCall(PetscBLASIntCast(ranges[1],&bmb)); /* row block size */ 994 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 995 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 996 PetscCheckScaLapackInfo("descinit",info); 997 998 /* redistribute matrix */ 999 PetscCall(MatDenseGetArray(Bmpi,&barray)); 1000 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1001 PetscCall(MatDenseRestoreArray(Bmpi,&barray)); 1002 1003 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 1004 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1005 if (reuse == MAT_INPLACE_MATRIX) { 1006 PetscCall(MatHeaderReplace(A,&Bmpi)); 1007 } else *B = Bmpi; 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct) 1013 { 1014 const PetscInt *ranges; 1015 PetscMPIInt size; 1016 PetscInt i,n; 1017 1018 PetscFunctionBegin; 1019 *correct = PETSC_TRUE; 1020 PetscCallMPI(MPI_Comm_size(map->comm,&size)); 1021 if (size>1) { 1022 PetscCall(PetscLayoutGetRanges(map,&ranges)); 1023 n = ranges[1]-ranges[0]; 1024 for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break; 1025 *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n)); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1031 { 1032 Mat_ScaLAPACK *b; 1033 Mat Bmpi; 1034 MPI_Comm comm; 1035 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1036 const PetscInt *ranges,*rows,*cols; 1037 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1038 PetscScalar *aarray; 1039 IS ir,ic; 1040 PetscInt lda; 1041 PetscBool flg; 1042 1043 PetscFunctionBegin; 1044 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1045 1046 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1047 else { 1048 PetscCall(MatCreate(comm,&Bmpi)); 1049 m = PETSC_DECIDE; 1050 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1051 n = PETSC_DECIDE; 1052 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 1053 PetscCall(MatSetSizes(Bmpi,m,n,M,N)); 1054 PetscCall(MatSetType(Bmpi,MATSCALAPACK)); 1055 PetscCall(MatSetUp(Bmpi)); 1056 } 1057 b = (Mat_ScaLAPACK*)Bmpi->data; 1058 1059 PetscCall(MatDenseGetLDA(A,&lda)); 1060 PetscCall(MatDenseGetArray(A,&aarray)); 1061 PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1062 if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1063 if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */ 1064 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1065 PetscCall(PetscLayoutGetRanges(A->rmap,&ranges)); 1066 PetscCall(PetscBLASIntCast(ranges[1],&amb)); /* row block size */ 1067 lld = PetscMax(lda,1); /* local leading dimension */ 1068 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1069 PetscCheckScaLapackInfo("descinit",info); 1070 1071 /* redistribute matrix */ 1072 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1073 Bmpi->nooffprocentries = PETSC_TRUE; 1074 } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */ 1075 PetscCheck(lda == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")",lda,A->rmap->n); 1076 b->roworiented = PETSC_FALSE; 1077 PetscCall(MatGetOwnershipIS(A,&ir,&ic)); 1078 PetscCall(ISGetIndices(ir,&rows)); 1079 PetscCall(ISGetIndices(ic,&cols)); 1080 PetscCall(MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES)); 1081 PetscCall(ISRestoreIndices(ir,&rows)); 1082 PetscCall(ISRestoreIndices(ic,&cols)); 1083 PetscCall(ISDestroy(&ic)); 1084 PetscCall(ISDestroy(&ir)); 1085 } 1086 PetscCall(MatDenseRestoreArray(A,&aarray)); 1087 PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY)); 1088 PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY)); 1089 if (reuse == MAT_INPLACE_MATRIX) { 1090 PetscCall(MatHeaderReplace(A,&Bmpi)); 1091 } else *B = Bmpi; 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1096 { 1097 Mat mat_scal; 1098 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1099 const PetscInt *cols; 1100 const PetscScalar *vals; 1101 1102 PetscFunctionBegin; 1103 if (reuse == MAT_REUSE_MATRIX) { 1104 mat_scal = *newmat; 1105 PetscCall(MatZeroEntries(mat_scal)); 1106 } else { 1107 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1108 m = PETSC_DECIDE; 1109 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1110 n = PETSC_DECIDE; 1111 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1112 PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 1113 PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 1114 PetscCall(MatSetUp(mat_scal)); 1115 } 1116 for (row=rstart;row<rend;row++) { 1117 PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 1118 PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES)); 1119 PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1120 } 1121 PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1122 PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1123 1124 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1125 else *newmat = mat_scal; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1130 { 1131 Mat mat_scal; 1132 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1133 const PetscInt *cols; 1134 const PetscScalar *vals; 1135 PetscScalar v; 1136 1137 PetscFunctionBegin; 1138 if (reuse == MAT_REUSE_MATRIX) { 1139 mat_scal = *newmat; 1140 PetscCall(MatZeroEntries(mat_scal)); 1141 } else { 1142 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat_scal)); 1143 m = PETSC_DECIDE; 1144 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M)); 1145 n = PETSC_DECIDE; 1146 PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N)); 1147 PetscCall(MatSetSizes(mat_scal,m,n,M,N)); 1148 PetscCall(MatSetType(mat_scal,MATSCALAPACK)); 1149 PetscCall(MatSetUp(mat_scal)); 1150 } 1151 PetscCall(MatGetRowUpperTriangular(A)); 1152 for (row=rstart;row<rend;row++) { 1153 PetscCall(MatGetRow(A,row,&ncols,&cols,&vals)); 1154 PetscCall(MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES)); 1155 for (j=0;j<ncols;j++) { /* lower triangular part */ 1156 if (cols[j] == row) continue; 1157 v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j]; 1158 PetscCall(MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES)); 1159 } 1160 PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals)); 1161 } 1162 PetscCall(MatRestoreRowUpperTriangular(A)); 1163 PetscCall(MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY)); 1164 PetscCall(MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY)); 1165 1166 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A,&mat_scal)); 1167 else *newmat = mat_scal; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1172 { 1173 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1174 PetscInt sz=0; 1175 1176 PetscFunctionBegin; 1177 PetscCall(PetscLayoutSetUp(A->rmap)); 1178 PetscCall(PetscLayoutSetUp(A->cmap)); 1179 if (!a->lld) a->lld = a->locr; 1180 1181 PetscCall(PetscFree(a->loc)); 1182 PetscCall(PetscIntMultError(a->lld,a->locc,&sz)); 1183 PetscCall(PetscCalloc1(sz,&a->loc)); 1184 PetscCall(PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar))); 1185 1186 A->preallocated = PETSC_TRUE; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1191 { 1192 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1193 Mat_ScaLAPACK_Grid *grid; 1194 PetscBool flg; 1195 MPI_Comm icomm; 1196 1197 PetscFunctionBegin; 1198 PetscCall(MatStashDestroy_Private(&A->stash)); 1199 PetscCall(PetscFree(a->loc)); 1200 PetscCall(PetscFree(a->pivots)); 1201 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1202 PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1203 if (--grid->grid_refct == 0) { 1204 Cblacs_gridexit(grid->ictxt); 1205 Cblacs_gridexit(grid->ictxrow); 1206 Cblacs_gridexit(grid->ictxcol); 1207 PetscCall(PetscFree(grid)); 1208 PetscCallMPI(MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval)); 1209 } 1210 PetscCall(PetscCommDestroy(&icomm)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL)); 1215 PetscCall(PetscFree(A->data)); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1220 { 1221 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1222 PetscBLASInt info=0; 1223 PetscBool flg; 1224 1225 PetscFunctionBegin; 1226 PetscCall(PetscLayoutSetUp(A->rmap)); 1227 PetscCall(PetscLayoutSetUp(A->cmap)); 1228 1229 /* check that the layout is as enforced by MatCreateScaLAPACK() */ 1230 PetscCall(MatScaLAPACKCheckLayout(A->rmap,&flg)); 1231 PetscCheck(flg,A->rmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1232 PetscCall(MatScaLAPACKCheckLayout(A->cmap,&flg)); 1233 PetscCheck(flg,A->cmap->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1234 1235 /* compute local sizes */ 1236 PetscCall(PetscBLASIntCast(A->rmap->N,&a->M)); 1237 PetscCall(PetscBLASIntCast(A->cmap->N,&a->N)); 1238 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1239 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1240 a->lld = PetscMax(1,a->locr); 1241 1242 /* allocate local array */ 1243 PetscCall(MatScaLAPACKSetPreallocation(A)); 1244 1245 /* set up ScaLAPACK descriptor */ 1246 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1247 PetscCheckScaLapackInfo("descinit",info); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1252 { 1253 PetscInt nstash,reallocs; 1254 1255 PetscFunctionBegin; 1256 if (A->nooffprocentries) PetscFunctionReturn(0); 1257 PetscCall(MatStashScatterBegin_Private(A,&A->stash,NULL)); 1258 PetscCall(MatStashGetInfo_Private(&A->stash,&nstash,&reallocs)); 1259 PetscCall(PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1264 { 1265 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1266 PetscMPIInt n; 1267 PetscInt i,flg,*row,*col; 1268 PetscScalar *val; 1269 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1270 1271 PetscFunctionBegin; 1272 if (A->nooffprocentries) PetscFunctionReturn(0); 1273 while (1) { 1274 PetscCall(MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg)); 1275 if (!flg) break; 1276 for (i=0;i<n;i++) { 1277 PetscCall(PetscBLASIntCast(row[i]+1,&gridx)); 1278 PetscCall(PetscBLASIntCast(col[i]+1,&gcidx)); 1279 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1280 PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol,PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process"); 1281 switch (A->insertmode) { 1282 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1283 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1284 default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1285 } 1286 } 1287 } 1288 PetscCall(MatStashScatterEnd_Private(&A->stash)); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1293 { 1294 Mat Adense,As; 1295 MPI_Comm comm; 1296 1297 PetscFunctionBegin; 1298 PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm)); 1299 PetscCall(MatCreate(comm,&Adense)); 1300 PetscCall(MatSetType(Adense,MATDENSE)); 1301 PetscCall(MatLoad(Adense,viewer)); 1302 PetscCall(MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As)); 1303 PetscCall(MatDestroy(&Adense)); 1304 PetscCall(MatHeaderReplace(newMat,&As)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* -------------------------------------------------------------------*/ 1309 static struct _MatOps MatOps_Values = { 1310 MatSetValues_ScaLAPACK, 1311 0, 1312 0, 1313 MatMult_ScaLAPACK, 1314 /* 4*/ MatMultAdd_ScaLAPACK, 1315 MatMultTranspose_ScaLAPACK, 1316 MatMultTransposeAdd_ScaLAPACK, 1317 MatSolve_ScaLAPACK, 1318 MatSolveAdd_ScaLAPACK, 1319 0, 1320 /*10*/ 0, 1321 MatLUFactor_ScaLAPACK, 1322 MatCholeskyFactor_ScaLAPACK, 1323 0, 1324 MatTranspose_ScaLAPACK, 1325 /*15*/ MatGetInfo_ScaLAPACK, 1326 0, 1327 MatGetDiagonal_ScaLAPACK, 1328 MatDiagonalScale_ScaLAPACK, 1329 MatNorm_ScaLAPACK, 1330 /*20*/ MatAssemblyBegin_ScaLAPACK, 1331 MatAssemblyEnd_ScaLAPACK, 1332 MatSetOption_ScaLAPACK, 1333 MatZeroEntries_ScaLAPACK, 1334 /*24*/ 0, 1335 MatLUFactorSymbolic_ScaLAPACK, 1336 MatLUFactorNumeric_ScaLAPACK, 1337 MatCholeskyFactorSymbolic_ScaLAPACK, 1338 MatCholeskyFactorNumeric_ScaLAPACK, 1339 /*29*/ MatSetUp_ScaLAPACK, 1340 0, 1341 0, 1342 0, 1343 0, 1344 /*34*/ MatDuplicate_ScaLAPACK, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*39*/ MatAXPY_ScaLAPACK, 1350 0, 1351 0, 1352 0, 1353 MatCopy_ScaLAPACK, 1354 /*44*/ 0, 1355 MatScale_ScaLAPACK, 1356 MatShift_ScaLAPACK, 1357 0, 1358 0, 1359 /*49*/ 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 /*54*/ 0, 1365 0, 1366 0, 1367 0, 1368 0, 1369 /*59*/ 0, 1370 MatDestroy_ScaLAPACK, 1371 MatView_ScaLAPACK, 1372 0, 1373 0, 1374 /*64*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*69*/ 0, 1380 0, 1381 MatConvert_ScaLAPACK_Dense, 1382 0, 1383 0, 1384 /*74*/ 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*79*/ 0, 1390 0, 1391 0, 1392 0, 1393 MatLoad_ScaLAPACK, 1394 /*84*/ 0, 1395 0, 1396 0, 1397 0, 1398 0, 1399 /*89*/ 0, 1400 0, 1401 MatMatMultNumeric_ScaLAPACK, 1402 0, 1403 0, 1404 /*94*/ 0, 1405 0, 1406 0, 1407 MatMatTransposeMultNumeric_ScaLAPACK, 1408 0, 1409 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1410 0, 1411 0, 1412 MatConjugate_ScaLAPACK, 1413 0, 1414 /*104*/0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 /*109*/MatMatSolve_ScaLAPACK, 1420 0, 1421 0, 1422 0, 1423 MatMissingDiagonal_ScaLAPACK, 1424 /*114*/0, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /*119*/0, 1430 MatHermitianTranspose_ScaLAPACK, 1431 0, 1432 0, 1433 0, 1434 /*124*/0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 /*129*/0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*134*/0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 /*140*/0, 1451 0, 1452 0, 1453 0, 1454 0, 1455 /*145*/0, 1456 0, 1457 0, 1458 0, 1459 0 1460 }; 1461 1462 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1463 { 1464 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1465 PetscInt size=stash->size,nsends; 1466 PetscInt count,*sindices,**rindices,i,j,l; 1467 PetscScalar **rvalues,*svalues; 1468 MPI_Comm comm = stash->comm; 1469 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1470 PetscMPIInt *sizes,*nlengths,nreceives; 1471 PetscInt *sp_idx,*sp_idy; 1472 PetscScalar *sp_val; 1473 PetscMatStashSpace space,space_next; 1474 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1475 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1476 1477 PetscFunctionBegin; 1478 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1479 InsertMode addv; 1480 PetscCall(MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat))); 1481 PetscCheck(addv != (ADD_VALUES|INSERT_VALUES),PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1482 mat->insertmode = addv; /* in case this processor had no cache */ 1483 } 1484 1485 bs2 = stash->bs*stash->bs; 1486 1487 /* first count number of contributors to each processor */ 1488 PetscCall(PetscCalloc1(size,&nlengths)); 1489 PetscCall(PetscMalloc1(stash->n+1,&owner)); 1490 1491 i = j = 0; 1492 space = stash->space_head; 1493 while (space) { 1494 space_next = space->next; 1495 for (l=0; l<space->local_used; l++) { 1496 PetscCall(PetscBLASIntCast(space->idx[l]+1,&gridx)); 1497 PetscCall(PetscBLASIntCast(space->idy[l]+1,&gcidx)); 1498 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1499 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1500 nlengths[j]++; owner[i] = j; 1501 i++; 1502 } 1503 space = space_next; 1504 } 1505 1506 /* Now check what procs get messages - and compute nsends. */ 1507 PetscCall(PetscCalloc1(size,&sizes)); 1508 for (i=0, nsends=0; i<size; i++) { 1509 if (nlengths[i]) { 1510 sizes[i] = 1; nsends++; 1511 } 1512 } 1513 1514 {PetscMPIInt *onodes,*olengths; 1515 /* Determine the number of messages to expect, their lengths, from from-ids */ 1516 PetscCall(PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives)); 1517 PetscCall(PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths)); 1518 /* since clubbing row,col - lengths are multiplied by 2 */ 1519 for (i=0; i<nreceives; i++) olengths[i] *=2; 1520 PetscCall(PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1)); 1521 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1522 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1523 PetscCall(PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2)); 1524 PetscCall(PetscFree(onodes)); 1525 PetscCall(PetscFree(olengths));} 1526 1527 /* do sends: 1528 1) starts[i] gives the starting index in svalues for stuff going to 1529 the ith processor 1530 */ 1531 PetscCall(PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices)); 1532 PetscCall(PetscMalloc1(2*nsends,&send_waits)); 1533 PetscCall(PetscMalloc2(size,&startv,size,&starti)); 1534 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1535 startv[0] = 0; starti[0] = 0; 1536 for (i=1; i<size; i++) { 1537 startv[i] = startv[i-1] + nlengths[i-1]; 1538 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1539 } 1540 1541 i = 0; 1542 space = stash->space_head; 1543 while (space) { 1544 space_next = space->next; 1545 sp_idx = space->idx; 1546 sp_idy = space->idy; 1547 sp_val = space->val; 1548 for (l=0; l<space->local_used; l++) { 1549 j = owner[i]; 1550 if (bs2 == 1) { 1551 svalues[startv[j]] = sp_val[l]; 1552 } else { 1553 PetscInt k; 1554 PetscScalar *buf1,*buf2; 1555 buf1 = svalues+bs2*startv[j]; 1556 buf2 = space->val + bs2*l; 1557 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1558 } 1559 sindices[starti[j]] = sp_idx[l]; 1560 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1561 startv[j]++; 1562 starti[j]++; 1563 i++; 1564 } 1565 space = space_next; 1566 } 1567 startv[0] = 0; 1568 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1569 1570 for (i=0,count=0; i<size; i++) { 1571 if (sizes[i]) { 1572 PetscCallMPI(MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++)); 1573 PetscCallMPI(MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++)); 1574 } 1575 } 1576 #if defined(PETSC_USE_INFO) 1577 PetscCall(PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends)); 1578 for (i=0; i<size; i++) { 1579 if (sizes[i]) { 1580 PetscCall(PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))))); 1581 } 1582 } 1583 #endif 1584 PetscCall(PetscFree(nlengths)); 1585 PetscCall(PetscFree(owner)); 1586 PetscCall(PetscFree2(startv,starti)); 1587 PetscCall(PetscFree(sizes)); 1588 1589 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1590 PetscCall(PetscMalloc1(2*nreceives,&recv_waits)); 1591 1592 for (i=0; i<nreceives; i++) { 1593 recv_waits[2*i] = recv_waits1[i]; 1594 recv_waits[2*i+1] = recv_waits2[i]; 1595 } 1596 stash->recv_waits = recv_waits; 1597 1598 PetscCall(PetscFree(recv_waits1)); 1599 PetscCall(PetscFree(recv_waits2)); 1600 1601 stash->svalues = svalues; 1602 stash->sindices = sindices; 1603 stash->rvalues = rvalues; 1604 stash->rindices = rindices; 1605 stash->send_waits = send_waits; 1606 stash->nsends = nsends; 1607 stash->nrecvs = nreceives; 1608 stash->reproduce_count = 0; 1609 PetscFunctionReturn(0); 1610 } 1611 1612 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1613 { 1614 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1615 1616 PetscFunctionBegin; 1617 PetscCheck(!A->preallocated,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1618 PetscCheck(mb >= 1 || mb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %" PetscInt_FMT " must be at least 1",mb); 1619 PetscCheck(nb >= 1 || nb == PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %" PetscInt_FMT " must be at least 1",nb); 1620 PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1621 PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /*@ 1626 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of 1627 the ScaLAPACK matrix 1628 1629 Logically Collective on A 1630 1631 Input Parameters: 1632 + A - a MATSCALAPACK matrix 1633 . mb - the row block size 1634 - nb - the column block size 1635 1636 Level: intermediate 1637 1638 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()` 1639 @*/ 1640 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1641 { 1642 PetscFunctionBegin; 1643 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1644 PetscValidLogicalCollectiveInt(A,mb,2); 1645 PetscValidLogicalCollectiveInt(A,nb,3); 1646 PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb)); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1651 { 1652 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1653 1654 PetscFunctionBegin; 1655 if (mb) *mb = a->mb; 1656 if (nb) *nb = a->nb; 1657 PetscFunctionReturn(0); 1658 } 1659 1660 /*@ 1661 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of 1662 the ScaLAPACK matrix 1663 1664 Not collective 1665 1666 Input Parameter: 1667 . A - a MATSCALAPACK matrix 1668 1669 Output Parameters: 1670 + mb - the row block size 1671 - nb - the column block size 1672 1673 Level: intermediate 1674 1675 .seealso: `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()` 1676 @*/ 1677 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1678 { 1679 PetscFunctionBegin; 1680 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1681 PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb)); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1686 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1687 1688 /*MC 1689 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1690 1691 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1692 1693 Options Database Keys: 1694 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1695 . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option -pc_type lu 1696 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1697 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1698 1699 Note: 1700 Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous 1701 range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on 1702 the given rank. 1703 1704 Level: beginner 1705 1706 .seealso: `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()` 1707 M*/ 1708 1709 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1710 { 1711 Mat_ScaLAPACK *a; 1712 PetscBool flg,flg1; 1713 Mat_ScaLAPACK_Grid *grid; 1714 MPI_Comm icomm; 1715 PetscBLASInt nprow,npcol,myrow,mycol; 1716 PetscInt optv1,k=2,array[2]={0,0}; 1717 PetscMPIInt size; 1718 1719 PetscFunctionBegin; 1720 PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps))); 1721 A->insertmode = NOT_SET_VALUES; 1722 1723 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash)); 1724 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1725 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1726 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1727 A->stash.ScatterDestroy = NULL; 1728 1729 PetscCall(PetscNewLog(A,&a)); 1730 A->data = (void*)a; 1731 1732 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1733 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1734 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0)); 1735 PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free)); 1736 PetscCall(PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite)); 1737 } 1738 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL)); 1739 PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg)); 1740 if (!flg) { 1741 PetscCall(PetscNewLog(A,&grid)); 1742 1743 PetscCallMPI(MPI_Comm_size(icomm,&size)); 1744 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1745 1746 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat"); 1747 PetscCall(PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1)); 1748 if (flg1) { 1749 PetscCheck(size % optv1 == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %d",optv1,size); 1750 grid->nprow = optv1; 1751 } 1752 PetscOptionsEnd(); 1753 1754 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1755 grid->npcol = size/grid->nprow; 1756 PetscCall(PetscBLASIntCast(grid->nprow,&nprow)); 1757 PetscCall(PetscBLASIntCast(grid->npcol,&npcol)); 1758 grid->ictxt = Csys2blacs_handle(icomm); 1759 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1760 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1761 grid->grid_refct = 1; 1762 grid->nprow = nprow; 1763 grid->npcol = npcol; 1764 grid->myrow = myrow; 1765 grid->mycol = mycol; 1766 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1767 grid->ictxrow = Csys2blacs_handle(icomm); 1768 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1769 grid->ictxcol = Csys2blacs_handle(icomm); 1770 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1771 PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid)); 1772 1773 } else grid->grid_refct++; 1774 PetscCall(PetscCommDestroy(&icomm)); 1775 a->grid = grid; 1776 a->mb = DEFAULT_BLOCKSIZE; 1777 a->nb = DEFAULT_BLOCKSIZE; 1778 1779 PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat"); 1780 PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg)); 1781 if (flg) { 1782 a->mb = array[0]; 1783 a->nb = (k>1)? array[1]: a->mb; 1784 } 1785 PetscOptionsEnd(); 1786 1787 a->roworiented = PETSC_TRUE; 1788 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK)); 1789 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK)); 1791 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK)); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 /*@C 1796 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1797 (2D block cyclic distribution). 1798 1799 Collective 1800 1801 Input Parameters: 1802 + comm - MPI communicator 1803 . mb - row block size (or PETSC_DECIDE to have it set) 1804 . nb - column block size (or PETSC_DECIDE to have it set) 1805 . M - number of global rows 1806 . N - number of global columns 1807 . rsrc - coordinate of process that owns the first row of the distributed matrix 1808 - csrc - coordinate of process that owns the first column of the distributed matrix 1809 1810 Output Parameter: 1811 . A - the matrix 1812 1813 Options Database Keys: 1814 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1815 1816 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1817 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1818 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1819 1820 Notes: 1821 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1822 is chosen. 1823 1824 Storage Information: 1825 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1826 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1827 significance and are thus ignored. The block sizes refer to the values 1828 used for the distributed matrix, not the same meaning as in BAIJ. 1829 1830 Level: intermediate 1831 1832 .seealso: `MatCreate()`, `MatCreateDense()`, `MatSetValues()` 1833 @*/ 1834 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1835 { 1836 Mat_ScaLAPACK *a; 1837 PetscInt m,n; 1838 1839 PetscFunctionBegin; 1840 PetscCall(MatCreate(comm,A)); 1841 PetscCall(MatSetType(*A,MATSCALAPACK)); 1842 PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1843 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1844 m = PETSC_DECIDE; 1845 PetscCall(PetscSplitOwnershipEqual(comm,&m,&M)); 1846 n = PETSC_DECIDE; 1847 PetscCall(PetscSplitOwnershipEqual(comm,&n,&N)); 1848 PetscCall(MatSetSizes(*A,m,n,M,N)); 1849 a = (Mat_ScaLAPACK*)(*A)->data; 1850 PetscCall(PetscBLASIntCast(M,&a->M)); 1851 PetscCall(PetscBLASIntCast(N,&a->N)); 1852 PetscCall(PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb)); 1853 PetscCall(PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb)); 1854 PetscCall(PetscBLASIntCast(rsrc,&a->rsrc)); 1855 PetscCall(PetscBLASIntCast(csrc,&a->csrc)); 1856 PetscCall(MatSetUp(*A)); 1857 PetscFunctionReturn(0); 1858 } 1859