1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n" 4 " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n" 5 " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n" 6 " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n" 7 " TITLE = {Sca{LAPACK} Users' Guide},\n" 8 " PUBLISHER = {SIAM},\n" 9 " ADDRESS = {Philadelphia, PA},\n" 10 " YEAR = 1997\n" 11 "}\n"; 12 static PetscBool ScaLAPACKCite = PETSC_FALSE; 13 14 #define DEFAULT_BLOCKSIZE 64 15 16 /* 17 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 18 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 19 */ 20 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 21 22 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr); 28 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 33 { 34 PetscErrorCode ierr; 35 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 36 PetscBool iascii; 37 PetscViewerFormat format; 38 Mat Adense; 39 40 PetscFunctionBegin; 41 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 42 if (iascii) { 43 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 44 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 45 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 46 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 47 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 48 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 51 PetscFunctionReturn(0); 52 } 53 } 54 /* convert to dense format and call MatView() */ 55 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 56 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 57 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 62 { 63 PetscErrorCode ierr; 64 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 65 PetscLogDouble isend[2],irecv[2]; 66 67 PetscFunctionBegin; 68 info->block_size = 1.0; 69 70 isend[0] = a->lld*a->locc; /* locally allocated */ 71 isend[1] = a->locr*a->locc; /* used submatrix */ 72 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 73 info->nz_allocated = isend[0]; 74 info->nz_used = isend[1]; 75 } else if (flag == MAT_GLOBAL_MAX) { 76 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 77 info->nz_allocated = irecv[0]; 78 info->nz_used = irecv[1]; 79 } else if (flag == MAT_GLOBAL_SUM) { 80 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 81 info->nz_allocated = irecv[0]; 82 info->nz_used = irecv[1]; 83 } 84 85 info->nz_unneeded = 0; 86 info->assemblies = A->num_ass; 87 info->mallocs = 0; 88 info->memory = ((PetscObject)A)->mem; 89 info->fill_ratio_given = 0; 90 info->fill_ratio_needed = 0; 91 info->factor_mallocs = 0; 92 PetscFunctionReturn(0); 93 } 94 95 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 96 { 97 PetscFunctionBegin; 98 switch (op) { 99 case MAT_NEW_NONZERO_LOCATIONS: 100 case MAT_NEW_NONZERO_LOCATION_ERR: 101 case MAT_NEW_NONZERO_ALLOCATION_ERR: 102 case MAT_SYMMETRIC: 103 case MAT_SORTED_FULL: 104 case MAT_HERMITIAN: 105 break; 106 default: 107 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 108 } 109 PetscFunctionReturn(0); 110 } 111 112 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 113 { 114 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 115 PetscErrorCode ierr; 116 PetscInt i,j; 117 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 118 119 PetscFunctionBegin; 120 for (i=0;i<nr;i++) { 121 if (rows[i] < 0) continue; 122 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 123 for (j=0;j<nc;j++) { 124 if (cols[j] < 0) continue; 125 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 126 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 127 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 128 switch (imode) { 129 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 130 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 131 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 132 } 133 } else { 134 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 135 A->assembled = PETSC_FALSE; 136 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 144 { 145 PetscErrorCode ierr; 146 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 147 PetscScalar *x2d,*y2d,alpha=1.0; 148 const PetscInt *ranges; 149 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 150 151 PetscFunctionBegin; 152 if (transpose) { 153 154 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 155 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 156 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 157 xlld = PetscMax(1,A->rmap->n); 158 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 159 PetscCheckScaLapackInfo("descinit",info); 160 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 161 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 162 ylld = 1; 163 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 164 PetscCheckScaLapackInfo("descinit",info); 165 166 /* allocate 2d vectors */ 167 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 168 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 169 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 170 xlld = PetscMax(1,lszx); 171 172 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 173 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 174 PetscCheckScaLapackInfo("descinit",info); 175 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 176 PetscCheckScaLapackInfo("descinit",info); 177 178 /* redistribute x as a column of a 2d matrix */ 179 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 180 181 /* redistribute y as a row of a 2d matrix */ 182 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 183 184 /* call PBLAS subroutine */ 185 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 186 187 /* redistribute y from a row of a 2d matrix */ 188 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 189 190 } else { /* non-transpose */ 191 192 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 193 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 194 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 195 xlld = 1; 196 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 197 PetscCheckScaLapackInfo("descinit",info); 198 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 199 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 200 ylld = PetscMax(1,A->rmap->n); 201 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 202 PetscCheckScaLapackInfo("descinit",info); 203 204 /* allocate 2d vectors */ 205 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 206 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 207 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 208 ylld = PetscMax(1,lszy); 209 210 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 211 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 212 PetscCheckScaLapackInfo("descinit",info); 213 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 214 PetscCheckScaLapackInfo("descinit",info); 215 216 /* redistribute x as a row of a 2d matrix */ 217 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 218 219 /* redistribute y as a column of a 2d matrix */ 220 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 221 222 /* call PBLAS subroutine */ 223 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 224 225 /* redistribute y from a column of a 2d matrix */ 226 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 227 228 } 229 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 234 { 235 PetscErrorCode ierr; 236 const PetscScalar *xarray; 237 PetscScalar *yarray; 238 239 PetscFunctionBegin; 240 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 241 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 242 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 243 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 244 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 249 { 250 PetscErrorCode ierr; 251 const PetscScalar *xarray; 252 PetscScalar *yarray; 253 254 PetscFunctionBegin; 255 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 256 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 257 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 258 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 259 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 264 { 265 PetscErrorCode ierr; 266 const PetscScalar *xarray; 267 PetscScalar *zarray; 268 269 PetscFunctionBegin; 270 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 271 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 272 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 273 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 274 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 275 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 280 { 281 PetscErrorCode ierr; 282 const PetscScalar *xarray; 283 PetscScalar *zarray; 284 285 PetscFunctionBegin; 286 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 287 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 288 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 289 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 290 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 291 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 296 { 297 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 298 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 299 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 300 PetscScalar sone=1.0,zero=0.0; 301 PetscBLASInt one=1; 302 303 PetscFunctionBegin; 304 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 305 C->assembled = PETSC_TRUE; 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 310 { 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 315 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 316 ierr = MatSetUp(C);CHKERRQ(ierr); 317 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 318 PetscFunctionReturn(0); 319 } 320 321 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 322 { 323 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 324 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 325 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 326 PetscScalar sone=1.0,zero=0.0; 327 PetscBLASInt one=1; 328 329 PetscFunctionBegin; 330 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 331 C->assembled = PETSC_TRUE; 332 PetscFunctionReturn(0); 333 } 334 335 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 336 { 337 PetscErrorCode ierr; 338 339 PetscFunctionBegin; 340 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 341 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 342 ierr = MatSetUp(C);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 /* --------------------------------------- */ 347 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 348 { 349 PetscFunctionBegin; 350 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 351 C->ops->productsymbolic = MatProductSymbolic_AB; 352 PetscFunctionReturn(0); 353 } 354 355 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 356 { 357 PetscFunctionBegin; 358 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 359 C->ops->productsymbolic = MatProductSymbolic_ABt; 360 PetscFunctionReturn(0); 361 } 362 363 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 364 { 365 PetscErrorCode ierr; 366 Mat_Product *product = C->product; 367 368 PetscFunctionBegin; 369 switch (product->type) { 370 case MATPRODUCT_AB: 371 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 372 break; 373 case MATPRODUCT_ABt: 374 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 375 break; 376 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 377 } 378 PetscFunctionReturn(0); 379 } 380 /* --------------------------------------- */ 381 382 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 383 { 384 PetscErrorCode ierr; 385 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 386 PetscScalar *darray,*d2d,v; 387 const PetscInt *ranges; 388 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 389 390 PetscFunctionBegin; 391 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 392 393 if (A->rmap->N<=A->cmap->N) { /* row version */ 394 395 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 396 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 397 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 398 dlld = PetscMax(1,A->rmap->n); 399 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 400 PetscCheckScaLapackInfo("descinit",info); 401 402 /* allocate 2d vector */ 403 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 404 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 405 dlld = PetscMax(1,lszd); 406 407 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 408 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 409 PetscCheckScaLapackInfo("descinit",info); 410 411 /* collect diagonal */ 412 for (j=1;j<=a->M;j++) { 413 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 414 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 415 } 416 417 /* redistribute d from a column of a 2d matrix */ 418 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 419 ierr = PetscFree(d2d);CHKERRQ(ierr); 420 421 } else { /* column version */ 422 423 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 424 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 425 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 426 dlld = 1; 427 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 428 PetscCheckScaLapackInfo("descinit",info); 429 430 /* allocate 2d vector */ 431 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 432 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 433 434 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 435 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 436 PetscCheckScaLapackInfo("descinit",info); 437 438 /* collect diagonal */ 439 for (j=1;j<=a->N;j++) { 440 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 441 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 442 } 443 444 /* redistribute d from a row of a 2d matrix */ 445 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 446 ierr = PetscFree(d2d);CHKERRQ(ierr); 447 } 448 449 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 450 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 451 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 456 { 457 PetscErrorCode ierr; 458 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 459 const PetscScalar *d; 460 const PetscInt *ranges; 461 PetscScalar *d2d; 462 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 463 464 PetscFunctionBegin; 465 if (R) { 466 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 467 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 468 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 469 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 470 dlld = 1; 471 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 472 PetscCheckScaLapackInfo("descinit",info); 473 474 /* allocate 2d vector */ 475 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 476 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 477 478 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 479 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 480 PetscCheckScaLapackInfo("descinit",info); 481 482 /* redistribute d to a row of a 2d matrix */ 483 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 484 485 /* broadcast along process columns */ 486 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 487 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 488 489 /* local scaling */ 490 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 491 492 ierr = PetscFree(d2d);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 494 } 495 if (L) { 496 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 497 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 498 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 499 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 500 dlld = PetscMax(1,A->rmap->n); 501 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 502 PetscCheckScaLapackInfo("descinit",info); 503 504 /* allocate 2d vector */ 505 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 506 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 507 dlld = PetscMax(1,lszd); 508 509 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 510 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 511 PetscCheckScaLapackInfo("descinit",info); 512 513 /* redistribute d to a column of a 2d matrix */ 514 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 515 516 /* broadcast along process rows */ 517 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 518 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 519 520 /* local scaling */ 521 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 522 523 ierr = PetscFree(d2d);CHKERRQ(ierr); 524 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 530 { 531 PetscFunctionBegin; 532 *missing = PETSC_FALSE; 533 PetscFunctionReturn(0); 534 } 535 536 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 537 { 538 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 539 PetscBLASInt n,one=1; 540 541 PetscFunctionBegin; 542 n = x->lld*x->locc; 543 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 544 PetscFunctionReturn(0); 545 } 546 547 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 548 { 549 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 550 PetscBLASInt i,n; 551 PetscScalar v; 552 553 PetscFunctionBegin; 554 n = PetscMin(x->M,x->N); 555 for (i=1;i<=n;i++) { 556 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 557 v += alpha; 558 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 559 } 560 PetscFunctionReturn(0); 561 } 562 563 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 564 { 565 PetscErrorCode ierr; 566 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 567 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 568 PetscBLASInt one=1; 569 PetscScalar beta=1.0; 570 571 PetscFunctionBegin; 572 MatScaLAPACKCheckDistribution(Y,1,X,3); 573 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 574 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 579 { 580 PetscErrorCode ierr; 581 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 582 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 583 584 PetscFunctionBegin; 585 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 586 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 591 { 592 Mat Bs; 593 MPI_Comm comm; 594 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 595 PetscErrorCode ierr; 596 597 PetscFunctionBegin; 598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 599 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 600 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 601 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 602 b = (Mat_ScaLAPACK*)Bs->data; 603 b->M = a->M; 604 b->N = a->N; 605 b->mb = a->mb; 606 b->nb = a->nb; 607 b->rsrc = a->rsrc; 608 b->csrc = a->csrc; 609 ierr = MatSetUp(Bs);CHKERRQ(ierr); 610 *B = Bs; 611 if (op == MAT_COPY_VALUES) { 612 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 613 } 614 Bs->assembled = PETSC_TRUE; 615 PetscFunctionReturn(0); 616 } 617 618 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 619 { 620 PetscErrorCode ierr; 621 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 622 Mat Bs = *B; 623 PetscBLASInt one=1; 624 PetscScalar sone=1.0,zero=0.0; 625 #if defined(PETSC_USE_COMPLEX) 626 PetscInt i,j; 627 #endif 628 629 PetscFunctionBegin; 630 if (reuse == MAT_INITIAL_MATRIX) { 631 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 632 *B = Bs; 633 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 634 b = (Mat_ScaLAPACK*)Bs->data; 635 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 636 #if defined(PETSC_USE_COMPLEX) 637 /* undo conjugation */ 638 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 639 #endif 640 Bs->assembled = PETSC_TRUE; 641 PetscFunctionReturn(0); 642 } 643 644 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 645 { 646 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 647 PetscInt i,j; 648 649 PetscFunctionBegin; 650 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 651 PetscFunctionReturn(0); 652 } 653 654 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 655 { 656 PetscErrorCode ierr; 657 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 658 Mat Bs = *B; 659 PetscBLASInt one=1; 660 PetscScalar sone=1.0,zero=0.0; 661 662 PetscFunctionBegin; 663 if (reuse == MAT_INITIAL_MATRIX) { 664 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);CHKERRQ(ierr); 665 *B = Bs; 666 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 667 b = (Mat_ScaLAPACK*)Bs->data; 668 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 669 Bs->assembled = PETSC_TRUE; 670 PetscFunctionReturn(0); 671 } 672 673 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 674 { 675 PetscErrorCode ierr; 676 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 677 PetscScalar *x,*x2d; 678 const PetscInt *ranges; 679 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 680 681 PetscFunctionBegin; 682 ierr = VecCopy(B,X);CHKERRQ(ierr); 683 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 684 685 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 686 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 687 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 688 xlld = PetscMax(1,A->rmap->n); 689 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 690 PetscCheckScaLapackInfo("descinit",info); 691 692 /* allocate 2d vector */ 693 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 694 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 695 xlld = PetscMax(1,lszx); 696 697 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 698 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 699 PetscCheckScaLapackInfo("descinit",info); 700 701 /* redistribute x as a column of a 2d matrix */ 702 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 703 704 /* call ScaLAPACK subroutine */ 705 switch (A->factortype) { 706 case MAT_FACTOR_LU: 707 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 708 PetscCheckScaLapackInfo("getrs",info); 709 break; 710 case MAT_FACTOR_CHOLESKY: 711 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 712 PetscCheckScaLapackInfo("potrs",info); 713 break; 714 default: 715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 716 } 717 718 /* redistribute x from a column of a 2d matrix */ 719 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 720 721 ierr = PetscFree(x2d);CHKERRQ(ierr); 722 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 727 { 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 732 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 737 { 738 PetscErrorCode ierr; 739 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 740 PetscBool flg1,flg2; 741 PetscBLASInt one=1,info; 742 743 PetscFunctionBegin; 744 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 745 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 746 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 747 MatScaLAPACKCheckDistribution(B,1,X,2); 748 b = (Mat_ScaLAPACK*)B->data; 749 x = (Mat_ScaLAPACK*)X->data; 750 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 751 752 switch (A->factortype) { 753 case MAT_FACTOR_LU: 754 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 755 PetscCheckScaLapackInfo("getrs",info); 756 break; 757 case MAT_FACTOR_CHOLESKY: 758 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 759 PetscCheckScaLapackInfo("potrs",info); 760 break; 761 default: 762 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 768 { 769 PetscErrorCode ierr; 770 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 771 PetscBLASInt one=1,info; 772 773 PetscFunctionBegin; 774 if (!a->pivots) { 775 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 776 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 777 } 778 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 779 PetscCheckScaLapackInfo("getrf",info); 780 A->factortype = MAT_FACTOR_LU; 781 A->assembled = PETSC_TRUE; 782 783 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 784 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 789 { 790 PetscErrorCode ierr; 791 792 PetscFunctionBegin; 793 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 794 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 799 { 800 PetscFunctionBegin; 801 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 802 PetscFunctionReturn(0); 803 } 804 805 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 806 { 807 PetscErrorCode ierr; 808 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 809 PetscBLASInt one=1,info; 810 811 PetscFunctionBegin; 812 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 813 PetscCheckScaLapackInfo("potrf",info); 814 A->factortype = MAT_FACTOR_CHOLESKY; 815 A->assembled = PETSC_TRUE; 816 817 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 818 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 823 { 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 828 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 829 PetscFunctionReturn(0); 830 } 831 832 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 833 { 834 PetscFunctionBegin; 835 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 840 { 841 PetscFunctionBegin; 842 *type = MATSOLVERSCALAPACK; 843 PetscFunctionReturn(0); 844 } 845 846 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 847 { 848 Mat B; 849 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 850 PetscErrorCode ierr; 851 852 PetscFunctionBegin; 853 /* Create the factorization matrix */ 854 ierr = MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);CHKERRQ(ierr); 855 B->factortype = ftype; 856 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 857 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 858 859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 860 *F = B; 861 PetscFunctionReturn(0); 862 } 863 864 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 865 { 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 870 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 875 { 876 PetscErrorCode ierr; 877 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 878 PetscBLASInt one=1,lwork=0; 879 const char *ntype; 880 PetscScalar *work=NULL,dummy; 881 882 PetscFunctionBegin; 883 switch (type){ 884 case NORM_1: 885 ntype = "1"; 886 lwork = PetscMax(a->locr,a->locc); 887 break; 888 case NORM_FROBENIUS: 889 ntype = "F"; 890 work = &dummy; 891 break; 892 case NORM_INFINITY: 893 ntype = "I"; 894 lwork = PetscMax(a->locr,a->locc); 895 break; 896 default: 897 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 898 } 899 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 900 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 901 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 902 PetscFunctionReturn(0); 903 } 904 905 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 906 { 907 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 908 PetscErrorCode ierr; 909 910 PetscFunctionBegin; 911 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 916 { 917 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 918 PetscErrorCode ierr; 919 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 920 921 PetscFunctionBegin; 922 if (rows) { 923 n = a->locr; 924 nb = a->mb; 925 isrc = a->rsrc; 926 nproc = a->grid->nprow; 927 iproc = a->grid->myrow; 928 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 929 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 930 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 931 } 932 if (cols) { 933 n = a->locc; 934 nb = a->nb; 935 isrc = a->csrc; 936 nproc = a->grid->npcol; 937 iproc = a->grid->mycol; 938 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 939 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 940 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 946 { 947 PetscErrorCode ierr; 948 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 949 Mat Bmpi; 950 MPI_Comm comm; 951 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 952 const PetscInt *ranges,*branges,*cwork; 953 const PetscScalar *vwork; 954 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 955 PetscScalar *barray; 956 PetscBool differ=PETSC_FALSE; 957 PetscMPIInt size; 958 959 PetscFunctionBegin; 960 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 961 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 962 963 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 964 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 965 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 966 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 967 } 968 969 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 970 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 971 m = PETSC_DECIDE; 972 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 973 n = PETSC_DECIDE; 974 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 975 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 976 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 977 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 978 979 /* create ScaLAPACK descriptor for B (1d block distribution) */ 980 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 981 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 982 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 983 PetscCheckScaLapackInfo("descinit",info); 984 985 /* redistribute matrix */ 986 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 987 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 988 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 992 /* transfer rows of auxiliary matrix to the final matrix B */ 993 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 994 for (i=rstart;i<rend;i++) { 995 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 996 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 997 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 998 } 999 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 1002 1003 } else { /* normal cases */ 1004 1005 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1006 else { 1007 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1008 m = PETSC_DECIDE; 1009 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1010 n = PETSC_DECIDE; 1011 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1012 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1013 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1014 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1015 } 1016 1017 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1018 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1019 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1020 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1021 PetscCheckScaLapackInfo("descinit",info); 1022 1023 /* redistribute matrix */ 1024 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1025 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1026 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1027 1028 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 if (reuse == MAT_INPLACE_MATRIX) { 1031 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1032 } else *B = Bmpi; 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1038 { 1039 PetscErrorCode ierr; 1040 Mat_ScaLAPACK *b; 1041 Mat Bmpi; 1042 MPI_Comm comm; 1043 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1044 const PetscInt *ranges; 1045 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1046 PetscScalar *aarray; 1047 PetscInt lda; 1048 1049 PetscFunctionBegin; 1050 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1051 1052 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1053 else { 1054 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1055 m = PETSC_DECIDE; 1056 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1057 n = PETSC_DECIDE; 1058 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1059 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1060 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1061 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1062 } 1063 b = (Mat_ScaLAPACK*)Bmpi->data; 1064 1065 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1066 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1067 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1068 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1069 lld = PetscMax(lda,1); /* local leading dimension */ 1070 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1071 PetscCheckScaLapackInfo("descinit",info); 1072 1073 /* redistribute matrix */ 1074 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1075 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1076 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1077 1078 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1079 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1080 if (reuse == MAT_INPLACE_MATRIX) { 1081 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1082 } else *B = Bmpi; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1087 { 1088 Mat mat_scal; 1089 PetscErrorCode ierr; 1090 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1091 const PetscInt *cols; 1092 const PetscScalar *vals; 1093 1094 PetscFunctionBegin; 1095 if (reuse == MAT_REUSE_MATRIX) { 1096 mat_scal = *newmat; 1097 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1098 } else { 1099 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1100 m = PETSC_DECIDE; 1101 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1102 n = PETSC_DECIDE; 1103 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1104 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1105 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1106 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1107 } 1108 for (row=rstart;row<rend;row++) { 1109 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1110 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1111 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1112 } 1113 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1117 else *newmat = mat_scal; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1122 { 1123 Mat mat_scal; 1124 PetscErrorCode ierr; 1125 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1126 const PetscInt *cols; 1127 const PetscScalar *vals; 1128 PetscScalar v; 1129 1130 PetscFunctionBegin; 1131 if (reuse == MAT_REUSE_MATRIX) { 1132 mat_scal = *newmat; 1133 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1134 } else { 1135 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1136 m = PETSC_DECIDE; 1137 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1138 n = PETSC_DECIDE; 1139 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1140 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1141 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1142 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1143 } 1144 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1145 for (row=rstart;row<rend;row++) { 1146 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1147 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1148 for (j=0;j<ncols;j++) { /* lower triangular part */ 1149 if (cols[j] == row) continue; 1150 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1151 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1152 } 1153 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1154 } 1155 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1156 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1157 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 1159 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1160 else *newmat = mat_scal; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1165 { 1166 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1167 PetscErrorCode ierr; 1168 PetscInt sz=0; 1169 1170 PetscFunctionBegin; 1171 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1172 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1173 if (!a->lld) a->lld = a->locr; 1174 1175 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1176 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1177 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1178 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1179 1180 A->preallocated = PETSC_TRUE; 1181 PetscFunctionReturn(0); 1182 } 1183 1184 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1185 { 1186 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1187 PetscErrorCode ierr; 1188 Mat_ScaLAPACK_Grid *grid; 1189 PetscBool flg; 1190 MPI_Comm icomm; 1191 1192 PetscFunctionBegin; 1193 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1194 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1195 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1196 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1197 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1198 if (--grid->grid_refct == 0) { 1199 Cblacs_gridexit(grid->ictxt); 1200 Cblacs_gridexit(grid->ictxrow); 1201 Cblacs_gridexit(grid->ictxcol); 1202 ierr = PetscFree(grid);CHKERRQ(ierr); 1203 ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 1204 } 1205 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1215 { 1216 PetscErrorCode ierr; 1217 const PetscInt *ranges; 1218 PetscMPIInt size; 1219 PetscInt i,n; 1220 1221 PetscFunctionBegin; 1222 ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr); 1223 if (size>2) { 1224 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1225 n = ranges[1]-ranges[0]; 1226 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1227 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1233 { 1234 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1235 PetscErrorCode ierr; 1236 PetscBLASInt info=0; 1237 1238 PetscFunctionBegin; 1239 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1240 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1241 1242 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1243 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1244 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1245 1246 /* compute local sizes */ 1247 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1248 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1249 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1250 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1251 a->lld = PetscMax(1,a->locr); 1252 1253 /* allocate local array */ 1254 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1255 1256 /* set up ScaLAPACK descriptor */ 1257 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1258 PetscCheckScaLapackInfo("descinit",info); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1263 { 1264 PetscErrorCode ierr; 1265 PetscInt nstash,reallocs; 1266 1267 PetscFunctionBegin; 1268 if (A->nooffprocentries) PetscFunctionReturn(0); 1269 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1270 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1271 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1276 { 1277 PetscErrorCode ierr; 1278 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1279 PetscMPIInt n; 1280 PetscInt i,flg,*row,*col; 1281 PetscScalar *val; 1282 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1283 1284 PetscFunctionBegin; 1285 if (A->nooffprocentries) PetscFunctionReturn(0); 1286 while (1) { 1287 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1288 if (!flg) break; 1289 for (i=0;i<n;i++) { 1290 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1291 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1292 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1293 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1294 switch (A->insertmode) { 1295 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1296 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1297 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1298 } 1299 } 1300 } 1301 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1306 { 1307 PetscErrorCode ierr; 1308 Mat Adense,As; 1309 MPI_Comm comm; 1310 1311 PetscFunctionBegin; 1312 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1313 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1314 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1315 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1316 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1317 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1318 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* -------------------------------------------------------------------*/ 1323 static struct _MatOps MatOps_Values = { 1324 MatSetValues_ScaLAPACK, 1325 0, 1326 0, 1327 MatMult_ScaLAPACK, 1328 /* 4*/ MatMultAdd_ScaLAPACK, 1329 MatMultTranspose_ScaLAPACK, 1330 MatMultTransposeAdd_ScaLAPACK, 1331 MatSolve_ScaLAPACK, 1332 MatSolveAdd_ScaLAPACK, 1333 0, 1334 /*10*/ 0, 1335 MatLUFactor_ScaLAPACK, 1336 MatCholeskyFactor_ScaLAPACK, 1337 0, 1338 MatTranspose_ScaLAPACK, 1339 /*15*/ MatGetInfo_ScaLAPACK, 1340 0, 1341 MatGetDiagonal_ScaLAPACK, 1342 MatDiagonalScale_ScaLAPACK, 1343 MatNorm_ScaLAPACK, 1344 /*20*/ MatAssemblyBegin_ScaLAPACK, 1345 MatAssemblyEnd_ScaLAPACK, 1346 MatSetOption_ScaLAPACK, 1347 MatZeroEntries_ScaLAPACK, 1348 /*24*/ 0, 1349 MatLUFactorSymbolic_ScaLAPACK, 1350 MatLUFactorNumeric_ScaLAPACK, 1351 MatCholeskyFactorSymbolic_ScaLAPACK, 1352 MatCholeskyFactorNumeric_ScaLAPACK, 1353 /*29*/ MatSetUp_ScaLAPACK, 1354 0, 1355 0, 1356 0, 1357 0, 1358 /*34*/ MatDuplicate_ScaLAPACK, 1359 0, 1360 0, 1361 0, 1362 0, 1363 /*39*/ MatAXPY_ScaLAPACK, 1364 0, 1365 0, 1366 0, 1367 MatCopy_ScaLAPACK, 1368 /*44*/ 0, 1369 MatScale_ScaLAPACK, 1370 MatShift_ScaLAPACK, 1371 0, 1372 0, 1373 /*49*/ 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 /*54*/ 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 /*59*/ 0, 1384 MatDestroy_ScaLAPACK, 1385 MatView_ScaLAPACK, 1386 0, 1387 0, 1388 /*64*/ 0, 1389 0, 1390 0, 1391 0, 1392 0, 1393 /*69*/ 0, 1394 0, 1395 MatConvert_ScaLAPACK_Dense, 1396 0, 1397 0, 1398 /*74*/ 0, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*79*/ 0, 1404 0, 1405 0, 1406 0, 1407 MatLoad_ScaLAPACK, 1408 /*84*/ 0, 1409 0, 1410 0, 1411 0, 1412 0, 1413 /*89*/ 0, 1414 0, 1415 MatMatMultNumeric_ScaLAPACK, 1416 0, 1417 0, 1418 /*94*/ 0, 1419 0, 1420 0, 1421 MatMatTransposeMultNumeric_ScaLAPACK, 1422 0, 1423 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1424 0, 1425 0, 1426 MatConjugate_ScaLAPACK, 1427 0, 1428 /*104*/0, 1429 0, 1430 0, 1431 0, 1432 0, 1433 /*109*/MatMatSolve_ScaLAPACK, 1434 0, 1435 0, 1436 0, 1437 MatMissingDiagonal_ScaLAPACK, 1438 /*114*/0, 1439 0, 1440 0, 1441 0, 1442 0, 1443 /*119*/0, 1444 MatHermitianTranspose_ScaLAPACK, 1445 0, 1446 0, 1447 0, 1448 /*124*/0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 /*129*/0, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /*134*/0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 0, 1464 /*140*/0, 1465 0, 1466 0, 1467 0, 1468 0, 1469 /*145*/0, 1470 0, 1471 0 1472 }; 1473 1474 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1475 { 1476 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1477 PetscInt size=stash->size,nsends; 1478 PetscErrorCode ierr; 1479 PetscInt count,*sindices,**rindices,i,j,l; 1480 PetscScalar **rvalues,*svalues; 1481 MPI_Comm comm = stash->comm; 1482 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1483 PetscMPIInt *sizes,*nlengths,nreceives; 1484 PetscInt *sp_idx,*sp_idy; 1485 PetscScalar *sp_val; 1486 PetscMatStashSpace space,space_next; 1487 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1488 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1489 1490 PetscFunctionBegin; 1491 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1492 InsertMode addv; 1493 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1494 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1495 mat->insertmode = addv; /* in case this processor had no cache */ 1496 } 1497 1498 bs2 = stash->bs*stash->bs; 1499 1500 /* first count number of contributors to each processor */ 1501 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1502 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1503 1504 i = j = 0; 1505 space = stash->space_head; 1506 while (space) { 1507 space_next = space->next; 1508 for (l=0; l<space->local_used; l++) { 1509 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1511 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1512 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1513 nlengths[j]++; owner[i] = j; 1514 i++; 1515 } 1516 space = space_next; 1517 } 1518 1519 /* Now check what procs get messages - and compute nsends. */ 1520 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1521 for (i=0, nsends=0; i<size; i++) { 1522 if (nlengths[i]) { 1523 sizes[i] = 1; nsends++; 1524 } 1525 } 1526 1527 {PetscMPIInt *onodes,*olengths; 1528 /* Determine the number of messages to expect, their lengths, from from-ids */ 1529 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1530 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1531 /* since clubbing row,col - lengths are multiplied by 2 */ 1532 for (i=0; i<nreceives; i++) olengths[i] *=2; 1533 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1534 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1535 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1536 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1537 ierr = PetscFree(onodes);CHKERRQ(ierr); 1538 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1539 1540 /* do sends: 1541 1) starts[i] gives the starting index in svalues for stuff going to 1542 the ith processor 1543 */ 1544 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1545 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1546 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1547 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1548 startv[0] = 0; starti[0] = 0; 1549 for (i=1; i<size; i++) { 1550 startv[i] = startv[i-1] + nlengths[i-1]; 1551 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1552 } 1553 1554 i = 0; 1555 space = stash->space_head; 1556 while (space) { 1557 space_next = space->next; 1558 sp_idx = space->idx; 1559 sp_idy = space->idy; 1560 sp_val = space->val; 1561 for (l=0; l<space->local_used; l++) { 1562 j = owner[i]; 1563 if (bs2 == 1) { 1564 svalues[startv[j]] = sp_val[l]; 1565 } else { 1566 PetscInt k; 1567 PetscScalar *buf1,*buf2; 1568 buf1 = svalues+bs2*startv[j]; 1569 buf2 = space->val + bs2*l; 1570 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1571 } 1572 sindices[starti[j]] = sp_idx[l]; 1573 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1574 startv[j]++; 1575 starti[j]++; 1576 i++; 1577 } 1578 space = space_next; 1579 } 1580 startv[0] = 0; 1581 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1582 1583 for (i=0,count=0; i<size; i++) { 1584 if (sizes[i]) { 1585 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 1586 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 1587 } 1588 } 1589 #if defined(PETSC_USE_INFO) 1590 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1591 for (i=0; i<size; i++) { 1592 if (sizes[i]) { 1593 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1594 } 1595 } 1596 #endif 1597 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1598 ierr = PetscFree(owner);CHKERRQ(ierr); 1599 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1600 ierr = PetscFree(sizes);CHKERRQ(ierr); 1601 1602 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1603 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1604 1605 for (i=0; i<nreceives; i++) { 1606 recv_waits[2*i] = recv_waits1[i]; 1607 recv_waits[2*i+1] = recv_waits2[i]; 1608 } 1609 stash->recv_waits = recv_waits; 1610 1611 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1612 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1613 1614 stash->svalues = svalues; 1615 stash->sindices = sindices; 1616 stash->rvalues = rvalues; 1617 stash->rindices = rindices; 1618 stash->send_waits = send_waits; 1619 stash->nsends = nsends; 1620 stash->nrecvs = nreceives; 1621 stash->reproduce_count = 0; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1626 { 1627 PetscErrorCode ierr; 1628 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1629 1630 PetscFunctionBegin; 1631 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1632 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1633 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1634 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1635 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 /*@ 1640 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1641 the ScaLAPACK matrix 1642 1643 Logically Collective on A 1644 1645 Input Parameter: 1646 + A - a MATSCALAPACK matrix 1647 . mb - the row block size 1648 - nb - the column block size 1649 1650 Level: intermediate 1651 1652 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1653 @*/ 1654 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1655 { 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1660 PetscValidLogicalCollectiveInt(A,mb,2); 1661 PetscValidLogicalCollectiveInt(A,nb,3); 1662 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1667 { 1668 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1669 1670 PetscFunctionBegin; 1671 if (mb) *mb = a->mb; 1672 if (nb) *nb = a->nb; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 /*@ 1677 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1678 the ScaLAPACK matrix 1679 1680 Not collective 1681 1682 Input Parameter: 1683 . A - a MATSCALAPACK matrix 1684 1685 Output Parameters: 1686 + mb - the row block size 1687 - nb - the column block size 1688 1689 Level: intermediate 1690 1691 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1692 @*/ 1693 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1694 { 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1699 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1704 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1705 1706 /*MC 1707 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1708 1709 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1710 1711 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1712 1713 Options Database Keys: 1714 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1715 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1716 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1717 1718 Level: beginner 1719 1720 .seealso: MATDENSE, MATELEMENTAL 1721 M*/ 1722 1723 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1724 { 1725 Mat_ScaLAPACK *a; 1726 PetscErrorCode ierr; 1727 PetscBool flg,flg1; 1728 Mat_ScaLAPACK_Grid *grid; 1729 MPI_Comm icomm; 1730 PetscBLASInt nprow,npcol,myrow,mycol; 1731 PetscInt optv1,k=2,array[2]={0,0}; 1732 PetscMPIInt size; 1733 1734 PetscFunctionBegin; 1735 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1736 A->insertmode = NOT_SET_VALUES; 1737 1738 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1739 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1740 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1741 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1742 A->stash.ScatterDestroy = NULL; 1743 1744 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1745 A->data = (void*)a; 1746 1747 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1748 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1749 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr); 1750 ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr); 1751 ierr = PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);CHKERRQ(ierr); 1752 } 1753 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1754 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1755 if (!flg) { 1756 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1757 1758 ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr); 1759 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1760 1761 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1762 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1763 if (flg1) { 1764 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1765 grid->nprow = optv1; 1766 } 1767 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1768 1769 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1770 grid->npcol = size/grid->nprow; 1771 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1772 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1773 grid->ictxt = Csys2blacs_handle(icomm); 1774 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1775 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1776 grid->grid_refct = 1; 1777 grid->nprow = nprow; 1778 grid->npcol = npcol; 1779 grid->myrow = myrow; 1780 grid->mycol = mycol; 1781 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1782 grid->ictxrow = Csys2blacs_handle(icomm); 1783 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1784 grid->ictxcol = Csys2blacs_handle(icomm); 1785 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1786 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr); 1787 1788 } else grid->grid_refct++; 1789 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1790 a->grid = grid; 1791 a->mb = DEFAULT_BLOCKSIZE; 1792 a->nb = DEFAULT_BLOCKSIZE; 1793 1794 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1795 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1796 if (flg) { 1797 a->mb = array[0]; 1798 a->nb = (k>1)? array[1]: a->mb; 1799 } 1800 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1801 1802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1805 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 /*@C 1810 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1811 (2D block cyclic distribution). 1812 1813 Collective 1814 1815 Input Parameters: 1816 + comm - MPI communicator 1817 . mb - row block size (or PETSC_DECIDE to have it set) 1818 . nb - column block size (or PETSC_DECIDE to have it set) 1819 . M - number of global rows 1820 . N - number of global columns 1821 . rsrc - coordinate of process that owns the first row of the distributed matrix 1822 - csrc - coordinate of process that owns the first column of the distributed matrix 1823 1824 Output Parameter: 1825 . A - the matrix 1826 1827 Options Database Keys: 1828 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1829 1830 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1831 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1832 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1833 1834 Notes: 1835 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1836 is chosen. 1837 1838 Storage Information: 1839 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1840 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1841 significance and are thus ignored. The block sizes refer to the values 1842 used for the distributed matrix, not the same meaning as in BAIJ. 1843 1844 Level: intermediate 1845 1846 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1847 @*/ 1848 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1849 { 1850 PetscErrorCode ierr; 1851 Mat_ScaLAPACK *a; 1852 PetscInt m,n; 1853 1854 PetscFunctionBegin; 1855 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1856 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1857 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1858 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1859 m = PETSC_DECIDE; 1860 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1861 n = PETSC_DECIDE; 1862 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1863 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1864 a = (Mat_ScaLAPACK*)(*A)->data; 1865 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1871 ierr = MatSetUp(*A);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875