1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/ 2 3 #define DEFAULT_BLOCKSIZE 64 4 5 /* 6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that 7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid 8 */ 9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID; 10 11 static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr); 17 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer) 22 { 23 PetscErrorCode ierr; 24 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 25 PetscBool iascii; 26 PetscViewerFormat format; 27 Mat Adense; 28 29 PetscFunctionBegin; 30 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 31 if (iascii) { 32 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 33 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 34 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr); 38 PetscFunctionReturn(0); 39 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 40 PetscFunctionReturn(0); 41 } 42 } 43 /* convert to dense format and call MatView() */ 44 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr); 45 ierr = MatView(Adense,viewer);CHKERRQ(ierr); 46 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info) 51 { 52 PetscErrorCode ierr; 53 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 54 PetscLogDouble isend[2],irecv[2]; 55 56 PetscFunctionBegin; 57 info->block_size = 1.0; 58 59 isend[0] = a->lld*a->locc; /* locally allocated */ 60 isend[1] = a->locr*a->locc; /* used submatrix */ 61 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) { 62 info->nz_allocated = isend[0]; 63 info->nz_used = isend[1]; 64 } else if (flag == MAT_GLOBAL_MAX) { 65 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 66 info->nz_allocated = irecv[0]; 67 info->nz_used = irecv[1]; 68 } else if (flag == MAT_GLOBAL_SUM) { 69 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 70 info->nz_allocated = irecv[0]; 71 info->nz_used = irecv[1]; 72 } 73 74 info->nz_unneeded = 0; 75 info->assemblies = A->num_ass; 76 info->mallocs = 0; 77 info->memory = ((PetscObject)A)->mem; 78 info->fill_ratio_given = 0; 79 info->fill_ratio_needed = 0; 80 info->factor_mallocs = 0; 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg) 85 { 86 PetscFunctionBegin; 87 switch (op) { 88 case MAT_NEW_NONZERO_LOCATIONS: 89 case MAT_NEW_NONZERO_LOCATION_ERR: 90 case MAT_NEW_NONZERO_ALLOCATION_ERR: 91 case MAT_SYMMETRIC: 92 case MAT_SORTED_FULL: 93 case MAT_HERMITIAN: 94 break; 95 default: 96 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode) 102 { 103 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 104 PetscErrorCode ierr; 105 PetscInt i,j; 106 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 107 108 PetscFunctionBegin; 109 for (i=0;i<nr;i++) { 110 if (rows[i] < 0) continue; 111 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr); 112 for (j=0;j<nc;j++) { 113 if (cols[j] < 0) continue; 114 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr); 115 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 116 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) { 117 switch (imode) { 118 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break; 119 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break; 120 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode); 121 } 122 } else { 123 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set"); 124 A->assembled = PETSC_FALSE; 125 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr); 126 } 127 } 128 } 129 PetscFunctionReturn(0); 130 } 131 132 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y) 133 { 134 PetscErrorCode ierr; 135 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 136 PetscScalar *x2d,*y2d,alpha=1.0; 137 const PetscInt *ranges; 138 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info; 139 140 PetscFunctionBegin; 141 if (transpose) { 142 143 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 144 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 145 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 146 xlld = PetscMax(1,A->rmap->n); 147 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 148 PetscCheckScaLapackInfo("descinit",info); 149 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 150 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */ 151 ylld = 1; 152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info)); 153 PetscCheckScaLapackInfo("descinit",info); 154 155 /* allocate 2d vectors */ 156 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 157 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 158 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 159 xlld = PetscMax(1,lszx); 160 161 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 162 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 163 PetscCheckScaLapackInfo("descinit",info); 164 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 165 PetscCheckScaLapackInfo("descinit",info); 166 167 /* redistribute x as a column of a 2d matrix */ 168 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 169 170 /* redistribute y as a row of a 2d matrix */ 171 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow)); 172 173 /* call PBLAS subroutine */ 174 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 175 176 /* redistribute y from a row of a 2d matrix */ 177 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow)); 178 179 } else { /* non-transpose */ 180 181 /* create ScaLAPACK descriptors for vectors (1d block distribution) */ 182 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 183 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */ 184 xlld = 1; 185 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info)); 186 PetscCheckScaLapackInfo("descinit",info); 187 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 188 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */ 189 ylld = PetscMax(1,A->rmap->n); 190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info)); 191 PetscCheckScaLapackInfo("descinit",info); 192 193 /* allocate 2d vectors */ 194 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 195 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 196 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr); 197 ylld = PetscMax(1,lszy); 198 199 /* create ScaLAPACK descriptors for vectors (2d block distribution) */ 200 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 201 PetscCheckScaLapackInfo("descinit",info); 202 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info)); 203 PetscCheckScaLapackInfo("descinit",info); 204 205 /* redistribute x as a row of a 2d matrix */ 206 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow)); 207 208 /* redistribute y as a column of a 2d matrix */ 209 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol)); 210 211 /* call PBLAS subroutine */ 212 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one)); 213 214 /* redistribute y from a column of a 2d matrix */ 215 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol)); 216 217 } 218 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y) 223 { 224 PetscErrorCode ierr; 225 const PetscScalar *xarray; 226 PetscScalar *yarray; 227 228 PetscFunctionBegin; 229 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 230 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 231 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr); 232 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 233 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y) 238 { 239 PetscErrorCode ierr; 240 const PetscScalar *xarray; 241 PetscScalar *yarray; 242 243 PetscFunctionBegin; 244 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 245 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr); 246 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr); 247 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 248 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 253 { 254 PetscErrorCode ierr; 255 const PetscScalar *xarray; 256 PetscScalar *zarray; 257 258 PetscFunctionBegin; 259 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 260 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 261 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 262 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr); 263 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 264 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z) 269 { 270 PetscErrorCode ierr; 271 const PetscScalar *xarray; 272 PetscScalar *zarray; 273 274 PetscFunctionBegin; 275 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); } 276 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr); 277 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr); 278 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr); 279 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr); 280 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 285 { 286 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 287 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 288 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 289 PetscScalar sone=1.0,zero=0.0; 290 PetscBLASInt one=1; 291 292 PetscFunctionBegin; 293 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 294 C->assembled = PETSC_TRUE; 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 304 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 305 ierr = MatSetUp(C);CHKERRQ(ierr); 306 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK; 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C) 311 { 312 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 313 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 314 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data; 315 PetscScalar sone=1.0,zero=0.0; 316 PetscBLASInt one=1; 317 318 PetscFunctionBegin; 319 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc)); 320 C->assembled = PETSC_TRUE; 321 PetscFunctionReturn(0); 322 } 323 324 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C) 325 { 326 PetscErrorCode ierr; 327 328 PetscFunctionBegin; 329 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 330 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr); 331 ierr = MatSetUp(C);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* --------------------------------------- */ 336 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C) 337 { 338 PetscFunctionBegin; 339 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK; 340 C->ops->productsymbolic = MatProductSymbolic_AB; 341 PetscFunctionReturn(0); 342 } 343 344 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C) 345 { 346 PetscFunctionBegin; 347 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK; 348 C->ops->productsymbolic = MatProductSymbolic_ABt; 349 PetscFunctionReturn(0); 350 } 351 352 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C) 353 { 354 PetscErrorCode ierr; 355 Mat_Product *product = C->product; 356 357 PetscFunctionBegin; 358 switch (product->type) { 359 case MATPRODUCT_AB: 360 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr); 361 break; 362 case MATPRODUCT_ABt: 363 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr); 364 break; 365 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]); 366 } 367 PetscFunctionReturn(0); 368 } 369 /* --------------------------------------- */ 370 371 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D) 372 { 373 PetscErrorCode ierr; 374 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 375 PetscScalar *darray,*d2d,v; 376 const PetscInt *ranges; 377 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(D,&darray);CHKERRQ(ierr); 381 382 if (A->rmap->N<=A->cmap->N) { /* row version */ 383 384 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 385 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 386 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 387 dlld = PetscMax(1,A->rmap->n); 388 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 389 PetscCheckScaLapackInfo("descinit",info); 390 391 /* allocate 2d vector */ 392 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 393 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 394 dlld = PetscMax(1,lszd); 395 396 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 397 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 398 PetscCheckScaLapackInfo("descinit",info); 399 400 /* collect diagonal */ 401 for (j=1;j<=a->M;j++) { 402 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc)); 403 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v)); 404 } 405 406 /* redistribute d from a column of a 2d matrix */ 407 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol)); 408 ierr = PetscFree(d2d);CHKERRQ(ierr); 409 410 } else { /* column version */ 411 412 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 413 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 414 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 415 dlld = 1; 416 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 417 PetscCheckScaLapackInfo("descinit",info); 418 419 /* allocate 2d vector */ 420 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 421 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 422 423 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 424 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 425 PetscCheckScaLapackInfo("descinit",info); 426 427 /* collect diagonal */ 428 for (j=1;j<=a->N;j++) { 429 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc)); 430 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v)); 431 } 432 433 /* redistribute d from a row of a 2d matrix */ 434 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow)); 435 ierr = PetscFree(d2d);CHKERRQ(ierr); 436 } 437 438 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(D);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(D);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R) 445 { 446 PetscErrorCode ierr; 447 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 448 const PetscScalar *d; 449 const PetscInt *ranges; 450 PetscScalar *d2d; 451 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info; 452 453 PetscFunctionBegin; 454 if (R) { 455 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 456 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 457 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr); 458 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */ 459 dlld = 1; 460 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info)); 461 PetscCheckScaLapackInfo("descinit",info); 462 463 /* allocate 2d vector */ 464 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 465 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 466 467 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 468 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 469 PetscCheckScaLapackInfo("descinit",info); 470 471 /* redistribute d to a row of a 2d matrix */ 472 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow)); 473 474 /* broadcast along process columns */ 475 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld); 476 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol); 477 478 /* local scaling */ 479 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j]; 480 481 ierr = PetscFree(d2d);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr); 483 } 484 if (L) { 485 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 486 /* create ScaLAPACK descriptor for vector (1d block distribution) */ 487 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 488 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */ 489 dlld = PetscMax(1,A->rmap->n); 490 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info)); 491 PetscCheckScaLapackInfo("descinit",info); 492 493 /* allocate 2d vector */ 494 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 495 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr); 496 dlld = PetscMax(1,lszd); 497 498 /* create ScaLAPACK descriptor for vector (2d block distribution) */ 499 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info)); 500 PetscCheckScaLapackInfo("descinit",info); 501 502 /* redistribute d to a column of a 2d matrix */ 503 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol)); 504 505 /* broadcast along process rows */ 506 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld); 507 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0); 508 509 /* local scaling */ 510 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i]; 511 512 ierr = PetscFree(d2d);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr); 514 } 515 PetscFunctionReturn(0); 516 } 517 518 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d) 519 { 520 PetscFunctionBegin; 521 *missing = PETSC_FALSE; 522 PetscFunctionReturn(0); 523 } 524 525 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a) 526 { 527 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 528 PetscBLASInt n,one=1; 529 530 PetscFunctionBegin; 531 n = x->lld*x->locc; 532 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one)); 533 PetscFunctionReturn(0); 534 } 535 536 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha) 537 { 538 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 539 PetscBLASInt i,n; 540 PetscScalar v; 541 542 PetscFunctionBegin; 543 n = PetscMin(x->M,x->N); 544 for (i=1;i<=n;i++) { 545 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc)); 546 v += alpha; 547 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v)); 548 } 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 553 { 554 PetscErrorCode ierr; 555 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data; 556 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data; 557 PetscBLASInt one=1; 558 PetscScalar beta=1.0; 559 560 PetscFunctionBegin; 561 MatScaLAPACKCheckDistribution(Y,1,X,3); 562 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc)); 563 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str) 568 { 569 PetscErrorCode ierr; 570 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 571 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data; 572 573 PetscFunctionBegin; 574 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 575 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B) 580 { 581 Mat Bs; 582 MPI_Comm comm; 583 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b; 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 588 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr); 589 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 590 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 591 b = (Mat_ScaLAPACK*)Bs->data; 592 b->M = a->M; 593 b->N = a->N; 594 b->mb = a->mb; 595 b->nb = a->nb; 596 b->rsrc = a->rsrc; 597 b->csrc = a->csrc; 598 ierr = MatSetUp(Bs);CHKERRQ(ierr); 599 *B = Bs; 600 if (op == MAT_COPY_VALUES) { 601 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr); 602 } 603 Bs->assembled = PETSC_TRUE; 604 PetscFunctionReturn(0); 605 } 606 607 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 608 { 609 PetscErrorCode ierr; 610 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 611 Mat Bs = *B; 612 PetscBLASInt one=1; 613 PetscScalar sone=1.0,zero=0.0; 614 #if defined(PETSC_USE_COMPLEX) 615 PetscInt i,j; 616 #endif 617 618 PetscFunctionBegin; 619 if (reuse == MAT_INITIAL_MATRIX) { 620 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 621 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 622 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 623 ierr = MatSetUp(Bs);CHKERRQ(ierr); 624 *B = Bs; 625 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 626 b = (Mat_ScaLAPACK*)Bs->data; 627 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 628 #if defined(PETSC_USE_COMPLEX) 629 /* undo conjugation */ 630 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]); 631 #endif 632 Bs->assembled = PETSC_TRUE; 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A) 637 { 638 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 639 PetscInt i,j; 640 641 PetscFunctionBegin; 642 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]); 643 PetscFunctionReturn(0); 644 } 645 646 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B) 647 { 648 PetscErrorCode ierr; 649 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b; 650 Mat Bs = *B; 651 PetscBLASInt one=1; 652 PetscScalar sone=1.0,zero=0.0; 653 654 PetscFunctionBegin; 655 if (reuse == MAT_INITIAL_MATRIX) { 656 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr); 657 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 658 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr); 659 ierr = MatSetUp(Bs);CHKERRQ(ierr); 660 *B = Bs; 661 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported"); 662 b = (Mat_ScaLAPACK*)Bs->data; 663 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc)); 664 Bs->assembled = PETSC_TRUE; 665 PetscFunctionReturn(0); 666 } 667 668 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X) 669 { 670 PetscErrorCode ierr; 671 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 672 PetscScalar *x,*x2d; 673 const PetscInt *ranges; 674 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info; 675 676 PetscFunctionBegin; 677 ierr = VecCopy(B,X);CHKERRQ(ierr); 678 ierr = VecGetArray(X,&x);CHKERRQ(ierr); 679 680 /* create ScaLAPACK descriptor for a vector (1d block distribution) */ 681 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 682 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */ 683 xlld = PetscMax(1,A->rmap->n); 684 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info)); 685 PetscCheckScaLapackInfo("descinit",info); 686 687 /* allocate 2d vector */ 688 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 689 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr); 690 xlld = PetscMax(1,lszx); 691 692 /* create ScaLAPACK descriptor for a vector (2d block distribution) */ 693 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info)); 694 PetscCheckScaLapackInfo("descinit",info); 695 696 /* redistribute x as a column of a 2d matrix */ 697 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol)); 698 699 /* call ScaLAPACK subroutine */ 700 switch (A->factortype) { 701 case MAT_FACTOR_LU: 702 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info)); 703 PetscCheckScaLapackInfo("getrs",info); 704 break; 705 case MAT_FACTOR_CHOLESKY: 706 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info)); 707 PetscCheckScaLapackInfo("potrs",info); 708 break; 709 default: 710 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 711 } 712 713 /* redistribute x from a column of a 2d matrix */ 714 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol)); 715 716 ierr = PetscFree(x2d);CHKERRQ(ierr); 717 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X) 722 { 723 PetscErrorCode ierr; 724 725 PetscFunctionBegin; 726 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr); 727 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X) 732 { 733 PetscErrorCode ierr; 734 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x; 735 PetscBool flg1,flg2; 736 PetscBLASInt one=1,info; 737 738 PetscFunctionBegin; 739 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr); 741 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK"); 742 MatScaLAPACKCheckDistribution(B,1,X,2); 743 b = (Mat_ScaLAPACK*)B->data; 744 x = (Mat_ScaLAPACK*)X->data; 745 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr); 746 747 switch (A->factortype) { 748 case MAT_FACTOR_LU: 749 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info)); 750 PetscCheckScaLapackInfo("getrs",info); 751 break; 752 case MAT_FACTOR_CHOLESKY: 753 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info)); 754 PetscCheckScaLapackInfo("potrs",info); 755 break; 756 default: 757 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo) 763 { 764 PetscErrorCode ierr; 765 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 766 PetscBLASInt one=1,info; 767 768 PetscFunctionBegin; 769 if (!a->pivots) { 770 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr); 771 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr); 772 } 773 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info)); 774 PetscCheckScaLapackInfo("getrf",info); 775 A->factortype = MAT_FACTOR_LU; 776 A->assembled = PETSC_TRUE; 777 778 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 779 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 784 { 785 PetscErrorCode ierr; 786 787 PetscFunctionBegin; 788 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 789 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 794 { 795 PetscFunctionBegin; 796 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 797 PetscFunctionReturn(0); 798 } 799 800 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo) 801 { 802 PetscErrorCode ierr; 803 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 804 PetscBLASInt one=1,info; 805 806 PetscFunctionBegin; 807 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info)); 808 PetscCheckScaLapackInfo("potrf",info); 809 A->factortype = MAT_FACTOR_CHOLESKY; 810 A->assembled = PETSC_TRUE; 811 812 ierr = PetscFree(A->solvertype);CHKERRQ(ierr); 813 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info) 818 { 819 PetscErrorCode ierr; 820 821 PetscFunctionBegin; 822 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 823 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info) 828 { 829 PetscFunctionBegin; 830 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */ 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type) 835 { 836 PetscFunctionBegin; 837 *type = MATSOLVERSCALAPACK; 838 PetscFunctionReturn(0); 839 } 840 841 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F) 842 { 843 Mat B; 844 PetscErrorCode ierr; 845 846 PetscFunctionBegin; 847 /* Create the factorization matrix */ 848 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 849 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 850 ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr); 851 ierr = MatSetUp(B);CHKERRQ(ierr); 852 B->factortype = ftype; 853 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 854 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr); 855 856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr); 857 *F = B; 858 PetscFunctionReturn(0); 859 } 860 861 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void) 862 { 863 PetscErrorCode ierr; 864 865 PetscFunctionBegin; 866 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 867 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm) 872 { 873 PetscErrorCode ierr; 874 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 875 PetscBLASInt one=1,lwork=0; 876 const char *ntype; 877 PetscScalar *work=NULL,dummy; 878 879 PetscFunctionBegin; 880 switch (type){ 881 case NORM_1: 882 ntype = "1"; 883 lwork = PetscMax(a->locr,a->locc); 884 break; 885 case NORM_FROBENIUS: 886 ntype = "F"; 887 work = &dummy; 888 break; 889 case NORM_INFINITY: 890 ntype = "I"; 891 lwork = PetscMax(a->locr,a->locc); 892 break; 893 default: 894 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type"); 895 } 896 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); } 897 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work); 898 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); } 899 PetscFunctionReturn(0); 900 } 901 902 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A) 903 { 904 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 905 PetscErrorCode ierr; 906 907 PetscFunctionBegin; 908 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols) 913 { 914 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 915 PetscErrorCode ierr; 916 PetscInt i,n,nb,isrc,nproc,iproc,*idx; 917 918 PetscFunctionBegin; 919 if (rows) { 920 n = a->locr; 921 nb = a->mb; 922 isrc = a->rsrc; 923 nproc = a->grid->nprow; 924 iproc = a->grid->myrow; 925 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 926 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 927 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); 928 } 929 if (cols) { 930 n = a->locc; 931 nb = a->nb; 932 isrc = a->csrc; 933 nproc = a->grid->npcol; 934 iproc = a->grid->mycol; 935 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 936 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb; 937 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr); 938 } 939 PetscFunctionReturn(0); 940 } 941 942 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B) 943 { 944 PetscErrorCode ierr; 945 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 946 Mat Bmpi; 947 MPI_Comm comm; 948 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz; 949 const PetscInt *ranges,*branges,*cwork; 950 const PetscScalar *vwork; 951 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info; 952 PetscScalar *barray; 953 PetscBool differ=PETSC_FALSE; 954 PetscMPIInt size; 955 956 PetscFunctionBegin; 957 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 958 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 959 960 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */ 961 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 962 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr); 963 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; } 964 } 965 966 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */ 967 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 968 m = PETSC_DECIDE; 969 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 970 n = PETSC_DECIDE; 971 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 972 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 973 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 974 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 975 976 /* create ScaLAPACK descriptor for B (1d block distribution) */ 977 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 978 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 979 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 980 PetscCheckScaLapackInfo("descinit",info); 981 982 /* redistribute matrix */ 983 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 984 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 985 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 989 /* transfer rows of auxiliary matrix to the final matrix B */ 990 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr); 991 for (i=rstart;i<rend;i++) { 992 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 993 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 994 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr); 995 } 996 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 997 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr); 999 1000 } else { /* normal cases */ 1001 1002 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1003 else { 1004 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1005 m = PETSC_DECIDE; 1006 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1007 n = PETSC_DECIDE; 1008 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1009 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1010 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr); 1011 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1012 } 1013 1014 /* create ScaLAPACK descriptor for B (1d block distribution) */ 1015 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */ 1016 lld = PetscMax(A->rmap->n,1); /* local leading dimension */ 1017 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info)); 1018 PetscCheckScaLapackInfo("descinit",info); 1019 1020 /* redistribute matrix */ 1021 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr); 1022 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol)); 1023 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr); 1024 1025 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1026 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1027 if (reuse == MAT_INPLACE_MATRIX) { 1028 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1029 } else *B = Bmpi; 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B) 1035 { 1036 PetscErrorCode ierr; 1037 Mat_ScaLAPACK *b; 1038 Mat Bmpi; 1039 MPI_Comm comm; 1040 PetscInt M=A->rmap->N,N=A->cmap->N,m,n; 1041 const PetscInt *ranges; 1042 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info; 1043 PetscScalar *aarray; 1044 PetscInt lda; 1045 1046 PetscFunctionBegin; 1047 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1048 1049 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B; 1050 else { 1051 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr); 1052 m = PETSC_DECIDE; 1053 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1054 n = PETSC_DECIDE; 1055 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1056 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr); 1057 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr); 1058 ierr = MatSetUp(Bmpi);CHKERRQ(ierr); 1059 } 1060 b = (Mat_ScaLAPACK*)Bmpi->data; 1061 1062 /* create ScaLAPACK descriptor for A (1d block distribution) */ 1063 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr); 1064 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */ 1065 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1066 lld = PetscMax(lda,1); /* local leading dimension */ 1067 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info)); 1068 PetscCheckScaLapackInfo("descinit",info); 1069 1070 /* redistribute matrix */ 1071 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr); 1072 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol)); 1073 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr); 1074 1075 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1076 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1077 if (reuse == MAT_INPLACE_MATRIX) { 1078 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr); 1079 } else *B = Bmpi; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 1084 { 1085 Mat mat_scal; 1086 PetscErrorCode ierr; 1087 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols; 1088 const PetscInt *cols; 1089 const PetscScalar *vals; 1090 1091 PetscFunctionBegin; 1092 if (reuse == MAT_REUSE_MATRIX) { 1093 mat_scal = *newmat; 1094 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1095 } else { 1096 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1097 m = PETSC_DECIDE; 1098 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1099 n = PETSC_DECIDE; 1100 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1101 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1102 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1103 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1104 } 1105 for (row=rstart;row<rend;row++) { 1106 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1107 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1108 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1109 } 1110 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1111 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1112 1113 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1114 else *newmat = mat_scal; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1119 { 1120 Mat mat_scal; 1121 PetscErrorCode ierr; 1122 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend; 1123 const PetscInt *cols; 1124 const PetscScalar *vals; 1125 PetscScalar v; 1126 1127 PetscFunctionBegin; 1128 if (reuse == MAT_REUSE_MATRIX) { 1129 mat_scal = *newmat; 1130 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr); 1131 } else { 1132 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr); 1133 m = PETSC_DECIDE; 1134 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr); 1135 n = PETSC_DECIDE; 1136 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr); 1137 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr); 1138 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr); 1139 ierr = MatSetUp(mat_scal);CHKERRQ(ierr); 1140 } 1141 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1142 for (row=rstart;row<rend;row++) { 1143 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1144 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr); 1145 for (j=0;j<ncols;j++) { /* lower triangular part */ 1146 if (cols[j] == row) continue; 1147 v = A->hermitian ? PetscConj(vals[j]) : vals[j]; 1148 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr); 1149 } 1150 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 1151 } 1152 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1153 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1154 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1155 1156 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); } 1157 else *newmat = mat_scal; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A) 1162 { 1163 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1164 PetscErrorCode ierr; 1165 PetscInt sz=0; 1166 1167 PetscFunctionBegin; 1168 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1169 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1170 if (!a->lld) a->lld = a->locr; 1171 1172 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1173 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr); 1174 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr); 1175 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr); 1176 1177 A->preallocated = PETSC_TRUE; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A) 1182 { 1183 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1184 PetscErrorCode ierr; 1185 Mat_ScaLAPACK_Grid *grid; 1186 PetscBool flg; 1187 MPI_Comm icomm; 1188 1189 PetscFunctionBegin; 1190 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr); 1191 ierr = PetscFree(a->loc);CHKERRQ(ierr); 1192 ierr = PetscFree(a->pivots);CHKERRQ(ierr); 1193 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1194 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1195 if (--grid->grid_refct == 0) { 1196 Cblacs_gridexit(grid->ictxt); 1197 Cblacs_gridexit(grid->ictxrow); 1198 Cblacs_gridexit(grid->ictxcol); 1199 ierr = PetscFree(grid);CHKERRQ(ierr); 1200 ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRMPI(ierr); 1201 } 1202 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1203 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr); 1204 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscFree(A->data);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map) 1212 { 1213 PetscErrorCode ierr; 1214 const PetscInt *ranges; 1215 PetscMPIInt size; 1216 PetscInt i,n; 1217 1218 PetscFunctionBegin; 1219 ierr = MPI_Comm_size(map->comm,&size);CHKERRMPI(ierr); 1220 if (size>2) { 1221 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr); 1222 n = ranges[1]-ranges[0]; 1223 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break; 1224 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK"); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatSetUp_ScaLAPACK(Mat A) 1230 { 1231 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1232 PetscErrorCode ierr; 1233 PetscBLASInt info=0; 1234 1235 PetscFunctionBegin; 1236 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1237 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1238 1239 /* check that the layout is as enforced by MatCreateScaLAPACK */ 1240 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr); 1241 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr); 1242 1243 /* compute local sizes */ 1244 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr); 1245 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr); 1246 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow); 1247 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol); 1248 a->lld = PetscMax(1,a->locr); 1249 1250 /* allocate local array */ 1251 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr); 1252 1253 /* set up ScaLAPACK descriptor */ 1254 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info)); 1255 PetscCheckScaLapackInfo("descinit",info); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type) 1260 { 1261 PetscErrorCode ierr; 1262 PetscInt nstash,reallocs; 1263 1264 PetscFunctionBegin; 1265 if (A->nooffprocentries) PetscFunctionReturn(0); 1266 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr); 1267 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr); 1268 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type) 1273 { 1274 PetscErrorCode ierr; 1275 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1276 PetscMPIInt n; 1277 PetscInt i,flg,*row,*col; 1278 PetscScalar *val; 1279 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1280 1281 PetscFunctionBegin; 1282 if (A->nooffprocentries) PetscFunctionReturn(0); 1283 while (1) { 1284 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1285 if (!flg) break; 1286 for (i=0;i<n;i++) { 1287 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr); 1288 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr); 1289 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1290 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process"); 1291 switch (A->insertmode) { 1292 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break; 1293 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break; 1294 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode); 1295 } 1296 } 1297 } 1298 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer) 1303 { 1304 PetscErrorCode ierr; 1305 Mat Adense,As; 1306 MPI_Comm comm; 1307 1308 PetscFunctionBegin; 1309 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr); 1310 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr); 1311 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr); 1312 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr); 1313 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr); 1314 ierr = MatDestroy(&Adense);CHKERRQ(ierr); 1315 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* -------------------------------------------------------------------*/ 1320 static struct _MatOps MatOps_Values = { 1321 MatSetValues_ScaLAPACK, 1322 0, 1323 0, 1324 MatMult_ScaLAPACK, 1325 /* 4*/ MatMultAdd_ScaLAPACK, 1326 MatMultTranspose_ScaLAPACK, 1327 MatMultTransposeAdd_ScaLAPACK, 1328 MatSolve_ScaLAPACK, 1329 MatSolveAdd_ScaLAPACK, 1330 0, 1331 /*10*/ 0, 1332 MatLUFactor_ScaLAPACK, 1333 MatCholeskyFactor_ScaLAPACK, 1334 0, 1335 MatTranspose_ScaLAPACK, 1336 /*15*/ MatGetInfo_ScaLAPACK, 1337 0, 1338 MatGetDiagonal_ScaLAPACK, 1339 MatDiagonalScale_ScaLAPACK, 1340 MatNorm_ScaLAPACK, 1341 /*20*/ MatAssemblyBegin_ScaLAPACK, 1342 MatAssemblyEnd_ScaLAPACK, 1343 MatSetOption_ScaLAPACK, 1344 MatZeroEntries_ScaLAPACK, 1345 /*24*/ 0, 1346 MatLUFactorSymbolic_ScaLAPACK, 1347 MatLUFactorNumeric_ScaLAPACK, 1348 MatCholeskyFactorSymbolic_ScaLAPACK, 1349 MatCholeskyFactorNumeric_ScaLAPACK, 1350 /*29*/ MatSetUp_ScaLAPACK, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*34*/ MatDuplicate_ScaLAPACK, 1356 0, 1357 0, 1358 0, 1359 0, 1360 /*39*/ MatAXPY_ScaLAPACK, 1361 0, 1362 0, 1363 0, 1364 MatCopy_ScaLAPACK, 1365 /*44*/ 0, 1366 MatScale_ScaLAPACK, 1367 MatShift_ScaLAPACK, 1368 0, 1369 0, 1370 /*49*/ 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 /*54*/ 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 /*59*/ 0, 1381 MatDestroy_ScaLAPACK, 1382 MatView_ScaLAPACK, 1383 0, 1384 0, 1385 /*64*/ 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 /*69*/ 0, 1391 0, 1392 MatConvert_ScaLAPACK_Dense, 1393 0, 1394 0, 1395 /*74*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*79*/ 0, 1401 0, 1402 0, 1403 0, 1404 MatLoad_ScaLAPACK, 1405 /*84*/ 0, 1406 0, 1407 0, 1408 0, 1409 0, 1410 /*89*/ 0, 1411 0, 1412 MatMatMultNumeric_ScaLAPACK, 1413 0, 1414 0, 1415 /*94*/ 0, 1416 0, 1417 0, 1418 MatMatTransposeMultNumeric_ScaLAPACK, 1419 0, 1420 /*99*/ MatProductSetFromOptions_ScaLAPACK, 1421 0, 1422 0, 1423 MatConjugate_ScaLAPACK, 1424 0, 1425 /*104*/0, 1426 0, 1427 0, 1428 0, 1429 0, 1430 /*109*/MatMatSolve_ScaLAPACK, 1431 0, 1432 0, 1433 0, 1434 MatMissingDiagonal_ScaLAPACK, 1435 /*114*/0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 /*119*/0, 1441 MatHermitianTranspose_ScaLAPACK, 1442 0, 1443 0, 1444 0, 1445 /*124*/0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 /*129*/0, 1451 0, 1452 0, 1453 0, 1454 0, 1455 /*134*/0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*140*/0, 1462 0, 1463 0, 1464 0, 1465 0, 1466 /*145*/0, 1467 0, 1468 0 1469 }; 1470 1471 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners) 1472 { 1473 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 1474 PetscInt size=stash->size,nsends; 1475 PetscErrorCode ierr; 1476 PetscInt count,*sindices,**rindices,i,j,l; 1477 PetscScalar **rvalues,*svalues; 1478 MPI_Comm comm = stash->comm; 1479 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 1480 PetscMPIInt *sizes,*nlengths,nreceives; 1481 PetscInt *sp_idx,*sp_idy; 1482 PetscScalar *sp_val; 1483 PetscMatStashSpace space,space_next; 1484 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc; 1485 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data; 1486 1487 PetscFunctionBegin; 1488 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 1489 InsertMode addv; 1490 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1491 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1492 mat->insertmode = addv; /* in case this processor had no cache */ 1493 } 1494 1495 bs2 = stash->bs*stash->bs; 1496 1497 /* first count number of contributors to each processor */ 1498 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 1499 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 1500 1501 i = j = 0; 1502 space = stash->space_head; 1503 while (space) { 1504 space_next = space->next; 1505 for (l=0; l<space->local_used; l++) { 1506 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr); 1507 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr); 1508 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc)); 1509 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc); 1510 nlengths[j]++; owner[i] = j; 1511 i++; 1512 } 1513 space = space_next; 1514 } 1515 1516 /* Now check what procs get messages - and compute nsends. */ 1517 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 1518 for (i=0, nsends=0; i<size; i++) { 1519 if (nlengths[i]) { 1520 sizes[i] = 1; nsends++; 1521 } 1522 } 1523 1524 {PetscMPIInt *onodes,*olengths; 1525 /* Determine the number of messages to expect, their lengths, from from-ids */ 1526 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 1527 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 1528 /* since clubbing row,col - lengths are multiplied by 2 */ 1529 for (i=0; i<nreceives; i++) olengths[i] *=2; 1530 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 1531 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 1532 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 1533 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 1534 ierr = PetscFree(onodes);CHKERRQ(ierr); 1535 ierr = PetscFree(olengths);CHKERRQ(ierr);} 1536 1537 /* do sends: 1538 1) starts[i] gives the starting index in svalues for stuff going to 1539 the ith processor 1540 */ 1541 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 1542 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 1543 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 1544 /* use 2 sends the first with all_a, the next with all_i and all_j */ 1545 startv[0] = 0; starti[0] = 0; 1546 for (i=1; i<size; i++) { 1547 startv[i] = startv[i-1] + nlengths[i-1]; 1548 starti[i] = starti[i-1] + 2*nlengths[i-1]; 1549 } 1550 1551 i = 0; 1552 space = stash->space_head; 1553 while (space) { 1554 space_next = space->next; 1555 sp_idx = space->idx; 1556 sp_idy = space->idy; 1557 sp_val = space->val; 1558 for (l=0; l<space->local_used; l++) { 1559 j = owner[i]; 1560 if (bs2 == 1) { 1561 svalues[startv[j]] = sp_val[l]; 1562 } else { 1563 PetscInt k; 1564 PetscScalar *buf1,*buf2; 1565 buf1 = svalues+bs2*startv[j]; 1566 buf2 = space->val + bs2*l; 1567 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 1568 } 1569 sindices[starti[j]] = sp_idx[l]; 1570 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 1571 startv[j]++; 1572 starti[j]++; 1573 i++; 1574 } 1575 space = space_next; 1576 } 1577 startv[0] = 0; 1578 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 1579 1580 for (i=0,count=0; i<size; i++) { 1581 if (sizes[i]) { 1582 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 1583 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 1584 } 1585 } 1586 #if defined(PETSC_USE_INFO) 1587 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 1588 for (i=0; i<size; i++) { 1589 if (sizes[i]) { 1590 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1591 } 1592 } 1593 #endif 1594 ierr = PetscFree(nlengths);CHKERRQ(ierr); 1595 ierr = PetscFree(owner);CHKERRQ(ierr); 1596 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 1597 ierr = PetscFree(sizes);CHKERRQ(ierr); 1598 1599 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 1600 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 1601 1602 for (i=0; i<nreceives; i++) { 1603 recv_waits[2*i] = recv_waits1[i]; 1604 recv_waits[2*i+1] = recv_waits2[i]; 1605 } 1606 stash->recv_waits = recv_waits; 1607 1608 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 1609 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 1610 1611 stash->svalues = svalues; 1612 stash->sindices = sindices; 1613 stash->rvalues = rvalues; 1614 stash->rindices = rindices; 1615 stash->send_waits = send_waits; 1616 stash->nsends = nsends; 1617 stash->nrecvs = nreceives; 1618 stash->reproduce_count = 0; 1619 PetscFunctionReturn(0); 1620 } 1621 1622 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb) 1623 { 1624 PetscErrorCode ierr; 1625 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1626 1627 PetscFunctionBegin; 1628 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp"); 1629 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb); 1630 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb); 1631 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1632 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 /*@ 1637 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of 1638 the ScaLAPACK matrix 1639 1640 Logically Collective on A 1641 1642 Input Parameter: 1643 + A - a MATSCALAPACK matrix 1644 . mb - the row block size 1645 - nb - the column block size 1646 1647 Level: intermediate 1648 1649 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes() 1650 @*/ 1651 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb) 1652 { 1653 PetscErrorCode ierr; 1654 1655 PetscFunctionBegin; 1656 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1657 PetscValidLogicalCollectiveInt(A,mb,2); 1658 PetscValidLogicalCollectiveInt(A,nb,3); 1659 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb) 1664 { 1665 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data; 1666 1667 PetscFunctionBegin; 1668 if (mb) *mb = a->mb; 1669 if (nb) *nb = a->nb; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 /*@ 1674 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of 1675 the ScaLAPACK matrix 1676 1677 Not collective 1678 1679 Input Parameter: 1680 . A - a MATSCALAPACK matrix 1681 1682 Output Parameters: 1683 + mb - the row block size 1684 - nb - the column block size 1685 1686 Level: intermediate 1687 1688 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes() 1689 @*/ 1690 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb) 1691 { 1692 PetscErrorCode ierr; 1693 1694 PetscFunctionBegin; 1695 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1696 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 1701 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 1702 1703 /*MC 1704 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package 1705 1706 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK 1707 1708 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver 1709 1710 Options Database Keys: 1711 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions() 1712 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix 1713 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1714 1715 Level: beginner 1716 1717 .seealso: MATDENSE, MATELEMENTAL 1718 M*/ 1719 1720 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A) 1721 { 1722 Mat_ScaLAPACK *a; 1723 PetscErrorCode ierr; 1724 PetscBool flg,flg1; 1725 Mat_ScaLAPACK_Grid *grid; 1726 MPI_Comm icomm; 1727 PetscBLASInt nprow,npcol,myrow,mycol; 1728 PetscInt optv1,k=2,array[2]={0,0}; 1729 PetscMPIInt size; 1730 1731 PetscFunctionBegin; 1732 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1733 A->insertmode = NOT_SET_VALUES; 1734 1735 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr); 1736 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK; 1737 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref; 1738 A->stash.ScatterEnd = MatStashScatterEnd_Ref; 1739 A->stash.ScatterDestroy = NULL; 1740 1741 ierr = PetscNewLog(A,&a);CHKERRQ(ierr); 1742 A->data = (void*)a; 1743 1744 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */ 1745 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) { 1746 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRMPI(ierr); 1747 ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr); 1748 } 1749 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr); 1750 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRMPI(ierr); 1751 if (!flg) { 1752 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr); 1753 1754 ierr = MPI_Comm_size(icomm,&size);CHKERRMPI(ierr); 1755 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001); 1756 1757 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr); 1758 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr); 1759 if (flg1) { 1760 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size); 1761 grid->nprow = optv1; 1762 } 1763 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1764 1765 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */ 1766 grid->npcol = size/grid->nprow; 1767 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr); 1768 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr); 1769 grid->ictxt = Csys2blacs_handle(icomm); 1770 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol); 1771 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol); 1772 grid->grid_refct = 1; 1773 grid->nprow = nprow; 1774 grid->npcol = npcol; 1775 grid->myrow = myrow; 1776 grid->mycol = mycol; 1777 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */ 1778 grid->ictxrow = Csys2blacs_handle(icomm); 1779 Cblacs_gridinit(&grid->ictxrow,"R",1,size); 1780 grid->ictxcol = Csys2blacs_handle(icomm); 1781 Cblacs_gridinit(&grid->ictxcol,"R",size,1); 1782 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRMPI(ierr); 1783 1784 } else grid->grid_refct++; 1785 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr); 1786 a->grid = grid; 1787 a->mb = DEFAULT_BLOCKSIZE; 1788 a->nb = DEFAULT_BLOCKSIZE; 1789 1790 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr); 1791 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr); 1792 if (flg) { 1793 a->mb = array[0]; 1794 a->nb = (k>1)? array[1]: a->mb; 1795 } 1796 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1797 1798 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr); 1799 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr); 1801 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /*@C 1806 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format 1807 (2D block cyclic distribution). 1808 1809 Collective 1810 1811 Input Parameters: 1812 + comm - MPI communicator 1813 . mb - row block size (or PETSC_DECIDE to have it set) 1814 . nb - column block size (or PETSC_DECIDE to have it set) 1815 . M - number of global rows 1816 . N - number of global columns 1817 . rsrc - coordinate of process that owns the first row of the distributed matrix 1818 - csrc - coordinate of process that owns the first column of the distributed matrix 1819 1820 Output Parameter: 1821 . A - the matrix 1822 1823 Options Database Keys: 1824 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma) 1825 1826 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1827 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1828 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1829 1830 Notes: 1831 If PETSC_DECIDE is used for the block sizes, then an appropriate value 1832 is chosen. 1833 1834 Storage Information: 1835 Storate is completely managed by ScaLAPACK, so this requires PETSc to be 1836 configured with ScaLAPACK. In particular, PETSc's local sizes lose 1837 significance and are thus ignored. The block sizes refer to the values 1838 used for the distributed matrix, not the same meaning as in BAIJ. 1839 1840 Level: intermediate 1841 1842 .seealso: MatCreate(), MatCreateDense(), MatSetValues() 1843 @*/ 1844 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A) 1845 { 1846 PetscErrorCode ierr; 1847 Mat_ScaLAPACK *a; 1848 PetscInt m,n; 1849 1850 PetscFunctionBegin; 1851 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1852 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr); 1853 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions"); 1854 /* rows and columns are NOT distributed according to PetscSplitOwnership */ 1855 m = PETSC_DECIDE; 1856 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr); 1857 n = PETSC_DECIDE; 1858 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr); 1859 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1860 a = (Mat_ScaLAPACK*)(*A)->data; 1861 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr); 1862 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr); 1863 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr); 1867 ierr = MatSetUp(*A);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871