1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,rstart,rend,nrows,ncols,nlrows,nlcols; 190 const PetscInt *irow,*icol; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 194 PetscFunctionBegin; 195 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 196 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 198 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<ncols; i++) { 225 av = v + nlrows*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 238 *B = newmat; 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreArray_MPIDense" 244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 245 { 246 PetscFunctionBegin; 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 253 { 254 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 255 MPI_Comm comm = ((PetscObject)mat)->comm; 256 PetscErrorCode ierr; 257 PetscInt nstash,reallocs; 258 InsertMode addv; 259 260 PetscFunctionBegin; 261 /* make sure all processors are either in INSERTMODE or ADDMODE */ 262 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 263 if (addv == (ADD_VALUES|INSERT_VALUES)) { 264 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 265 } 266 mat->insertmode = addv; /* in case this processor had no cache */ 267 268 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 269 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 270 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 279 PetscErrorCode ierr; 280 PetscInt i,*row,*col,flg,j,rstart,ncols; 281 PetscMPIInt n; 282 PetscScalar *val; 283 InsertMode addv=mat->insertmode; 284 285 PetscFunctionBegin; 286 /* wait on receives */ 287 while (1) { 288 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 289 if (!flg) break; 290 291 for (i=0; i<n;) { 292 /* Now identify the consecutive vals belonging to the same row */ 293 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 294 if (j < n) ncols = j-i; 295 else ncols = n-i; 296 /* Now assemble all these values with a single function call */ 297 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 298 i = j; 299 } 300 } 301 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 302 303 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 305 306 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 307 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatZeroEntries_MPIDense" 314 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 315 { 316 PetscErrorCode ierr; 317 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 318 319 PetscFunctionBegin; 320 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 /* the code does not do the diagonal entries correctly unless the 325 matrix is square and the column and row owerships are identical. 326 This is a BUG. The only way to fix it seems to be to access 327 mdn->A and mdn->B directly and not through the MatZeroRows() 328 routine. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroRows_MPIDense" 332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 333 { 334 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 335 PetscErrorCode ierr; 336 PetscInt i,*owners = A->rmap->range; 337 PetscInt *nprocs,j,idx,nsends; 338 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 339 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 340 PetscInt *lens,*lrows,*values; 341 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 342 MPI_Comm comm = ((PetscObject)A)->comm; 343 MPI_Request *send_waits,*recv_waits; 344 MPI_Status recv_status,*send_status; 345 PetscTruth found; 346 347 PetscFunctionBegin; 348 /* first count number of contributors to each processor */ 349 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 350 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 352 for (i=0; i<N; i++) { 353 idx = rows[i]; 354 found = PETSC_FALSE; 355 for (j=0; j<size; j++) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 358 } 359 } 360 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 361 } 362 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 363 364 /* inform other processors of number of messages and max length*/ 365 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 366 367 /* post receives: */ 368 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 369 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 370 for (i=0; i<nrecvs; i++) { 371 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 372 } 373 374 /* do sends: 375 1) starts[i] gives the starting index in svalues for stuff going to 376 the ith processor 377 */ 378 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 379 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 380 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 381 starts[0] = 0; 382 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 for (i=0; i<N; i++) { 384 svalues[starts[owner[i]]++] = rows[i]; 385 } 386 387 starts[0] = 0; 388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 389 count = 0; 390 for (i=0; i<size; i++) { 391 if (nprocs[2*i+1]) { 392 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 393 } 394 } 395 ierr = PetscFree(starts);CHKERRQ(ierr); 396 397 base = owners[rank]; 398 399 /* wait on receives */ 400 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 401 source = lens + nrecvs; 402 count = nrecvs; slen = 0; 403 while (count) { 404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 405 /* unpack receives into our local space */ 406 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 407 source[imdex] = recv_status.MPI_SOURCE; 408 lens[imdex] = n; 409 slen += n; 410 count--; 411 } 412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 413 414 /* move the data into the send scatter */ 415 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 416 count = 0; 417 for (i=0; i<nrecvs; i++) { 418 values = rvalues + i*nmax; 419 for (j=0; j<lens[i]; j++) { 420 lrows[count++] = values[j] - base; 421 } 422 } 423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 424 ierr = PetscFree(lens);CHKERRQ(ierr); 425 ierr = PetscFree(owner);CHKERRQ(ierr); 426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 427 428 /* actually zap the local rows */ 429 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 430 ierr = PetscFree(lrows);CHKERRQ(ierr); 431 432 /* wait on sends */ 433 if (nsends) { 434 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 436 ierr = PetscFree(send_status);CHKERRQ(ierr); 437 } 438 ierr = PetscFree(send_waits);CHKERRQ(ierr); 439 ierr = PetscFree(svalues);CHKERRQ(ierr); 440 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatMult_MPIDense" 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultAdd_MPIDense" 460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTranspose_MPIDense" 474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 477 PetscErrorCode ierr; 478 PetscScalar zero = 0.0; 479 480 PetscFunctionBegin; 481 ierr = VecSet(yy,zero);CHKERRQ(ierr); 482 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 483 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 493 PetscErrorCode ierr; 494 495 PetscFunctionBegin; 496 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 497 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 498 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatGetDiagonal_MPIDense" 505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 506 { 507 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 508 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 509 PetscErrorCode ierr; 510 PetscInt len,i,n,m = A->rmap->n,radd; 511 PetscScalar *x,zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(v,zero);CHKERRQ(ierr); 515 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 516 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 517 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 518 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 519 radd = A->rmap->rstart*m; 520 for (i=0; i<len; i++) { 521 x[i] = aloc->v[radd + i*m + i]; 522 } 523 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatDestroy_MPIDense" 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532 PetscErrorCode ierr; 533 #if defined(PETSC_HAVE_PLAPACK) 534 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 535 #endif 536 537 PetscFunctionBegin; 538 539 #if defined(PETSC_USE_LOG) 540 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 541 #endif 542 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 543 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 544 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 545 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 546 #if defined(PETSC_HAVE_PLAPACK) 547 if (lu) { 548 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 549 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 550 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 551 552 if (lu->is_pla) { 553 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 554 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 555 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 556 } 557 } 558 #endif 559 560 ierr = PetscFree(mdn);CHKERRQ(ierr); 561 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatView_MPIDense_Binary" 572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 575 PetscErrorCode ierr; 576 PetscViewerFormat format; 577 int fd; 578 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 579 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 580 PetscScalar *work,*v,*vv; 581 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 582 MPI_Status status; 583 584 PetscFunctionBegin; 585 if (mdn->size == 1) { 586 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 587 } else { 588 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 589 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 590 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 591 592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 593 if (format == PETSC_VIEWER_NATIVE) { 594 595 if (!rank) { 596 /* store the matrix as a dense matrix */ 597 header[0] = MAT_FILE_COOKIE; 598 header[1] = mat->rmap->N; 599 header[2] = N; 600 header[3] = MATRIX_BINARY_FORMAT_DENSE; 601 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 602 603 /* get largest work array needed for transposing array */ 604 mmax = mat->rmap->n; 605 for (i=1; i<size; i++) { 606 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 607 } 608 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 609 610 /* write out local array, by rows */ 611 m = mat->rmap->n; 612 v = a->v; 613 for (j=0; j<N; j++) { 614 for (i=0; i<m; i++) { 615 work[j + i*N] = *v++; 616 } 617 } 618 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 619 /* get largest work array to receive messages from other processes, excludes process zero */ 620 mmax = 0; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 625 for(k = 1; k < size; k++) { 626 v = vv; 627 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 628 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 629 630 for(j = 0; j < N; j++) { 631 for(i = 0; i < m; i++) { 632 work[j + i*N] = *v++; 633 } 634 } 635 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(work);CHKERRQ(ierr); 638 ierr = PetscFree(vv);CHKERRQ(ierr); 639 } else { 640 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 641 } 642 } else { 643 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 652 { 653 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 654 PetscErrorCode ierr; 655 PetscMPIInt size = mdn->size,rank = mdn->rank; 656 const PetscViewerType vtype; 657 PetscTruth iascii,isdraw; 658 PetscViewer sviewer; 659 PetscViewerFormat format; 660 #if defined(PETSC_HAVE_PLAPACK) 661 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 662 #endif 663 664 PetscFunctionBegin; 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 674 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 677 #if defined(PETSC_HAVE_PLAPACK) 678 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr); 683 #endif 684 PetscFunctionReturn(0); 685 } else if (format == PETSC_VIEWER_ASCII_INFO) { 686 PetscFunctionReturn(0); 687 } 688 } else if (isdraw) { 689 PetscDraw draw; 690 PetscTruth isnull; 691 692 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 693 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 694 if (isnull) PetscFunctionReturn(0); 695 } 696 697 if (size == 1) { 698 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 699 } else { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 715 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 row = mat->rmap->rstart; m = mdn->A->rmap->n; 721 for (i=0; i<m; i++) { 722 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 723 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 724 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 row++; 726 } 727 728 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 729 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 730 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 733 } 734 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 736 ierr = MatDestroy(A);CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIDense" 743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscTruth iascii,isbinary,isdraw,issocket; 747 748 PetscFunctionBegin; 749 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 751 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 752 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 753 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 754 755 if (iascii || issocket || isdraw) { 756 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 757 } else if (isbinary) { 758 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 759 } else { 760 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 761 } 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetInfo_MPIDense" 767 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 768 { 769 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 770 Mat mdn = mat->A; 771 PetscErrorCode ierr; 772 PetscReal isend[5],irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 778 isend[3] = info->memory; isend[4] = info->mallocs; 779 if (flag == MAT_LOCAL) { 780 info->nz_used = isend[0]; 781 info->nz_allocated = isend[1]; 782 info->nz_unneeded = isend[2]; 783 info->memory = isend[3]; 784 info->mallocs = isend[4]; 785 } else if (flag == MAT_GLOBAL_MAX) { 786 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 787 info->nz_used = irecv[0]; 788 info->nz_allocated = irecv[1]; 789 info->nz_unneeded = irecv[2]; 790 info->memory = irecv[3]; 791 info->mallocs = irecv[4]; 792 } else if (flag == MAT_GLOBAL_SUM) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } 800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 801 info->fill_ratio_needed = 0; 802 info->factor_mallocs = 0; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatSetOption_MPIDense" 808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 809 { 810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 switch (op) { 815 case MAT_NEW_NONZERO_LOCATIONS: 816 case MAT_NEW_NONZERO_LOCATION_ERR: 817 case MAT_NEW_NONZERO_ALLOCATION_ERR: 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_ROW_ORIENTED: 821 a->roworiented = flg; 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_NEW_DIAGONALS: 825 case MAT_USE_HASH_TABLE: 826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_IGNORE_LOWER_TRIANGULAR: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 default: 839 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatDiagonalScale_MPIDense" 847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscScalar *l,*r,x,*v; 852 PetscErrorCode ierr; 853 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 854 855 PetscFunctionBegin; 856 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 857 if (ll) { 858 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 859 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 860 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 861 for (i=0; i<m; i++) { 862 x = l[i]; 863 v = mat->v + i; 864 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 865 } 866 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 867 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 868 } 869 if (rr) { 870 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 871 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 872 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 875 for (i=0; i<n; i++) { 876 x = r[i]; 877 v = mat->v + i*m; 878 for (j=0; j<m; j++) { (*v++) *= x;} 879 } 880 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatNorm_MPIDense" 888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 889 { 890 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 891 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 892 PetscErrorCode ierr; 893 PetscInt i,j; 894 PetscReal sum = 0.0; 895 PetscScalar *v = mat->v; 896 897 PetscFunctionBegin; 898 if (mdn->size == 1) { 899 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 900 } else { 901 if (type == NORM_FROBENIUS) { 902 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 903 #if defined(PETSC_USE_COMPLEX) 904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 905 #else 906 sum += (*v)*(*v); v++; 907 #endif 908 } 909 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 910 *nrm = sqrt(*nrm); 911 ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 915 tmp2 = tmp + A->cmap->N; 916 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap->n; j++) { 920 for (i=0; i<mdn->A->rmap->n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 925 for (j=0; j<A->cmap->N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree(tmp);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 934 } else { 935 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 936 } 937 } 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatTranspose_MPIDense" 943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 944 { 945 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 946 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 947 Mat B; 948 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 949 PetscErrorCode ierr; 950 PetscInt j,i; 951 PetscScalar *v; 952 953 PetscFunctionBegin; 954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 955 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 956 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 957 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 959 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 960 } else { 961 B = *matout; 962 } 963 964 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 965 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 966 for (i=0; i<m; i++) rwork[i] = rstart + i; 967 for (j=0; j<n; j++) { 968 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 969 v += m; 970 } 971 ierr = PetscFree(rwork);CHKERRQ(ierr); 972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 975 *matout = B; 976 } else { 977 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #include "petscblaslapack.h" 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatScale_MPIDense" 985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 986 { 987 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 988 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 989 PetscScalar oalpha = alpha; 990 PetscErrorCode ierr; 991 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 992 993 PetscFunctionBegin; 994 BLASscal_(&nz,&oalpha,a->v,&one); 995 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #if defined(PETSC_HAVE_PLAPACK) 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1016 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1017 { 1018 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1019 PetscErrorCode ierr; 1020 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1021 PetscScalar *array; 1022 PetscReal one = 1.0; 1023 1024 PetscFunctionBegin; 1025 /* Copy A into F->lu->A */ 1026 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1027 ierr = PLA_API_begin();CHKERRQ(ierr); 1028 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1029 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1030 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1031 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1032 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1033 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_end();CHKERRQ(ierr); 1035 lu->rstart = rstart; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1041 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1042 { 1043 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1044 PetscErrorCode ierr; 1045 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1046 PetscScalar *array; 1047 PetscReal one = 1.0; 1048 1049 PetscFunctionBegin; 1050 /* Copy F into A->lu->A */ 1051 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1052 ierr = PLA_API_begin();CHKERRQ(ierr); 1053 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1054 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1055 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1056 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1057 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1058 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1059 ierr = PLA_API_end();CHKERRQ(ierr); 1060 lu->rstart = rstart; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1066 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1067 { 1068 PetscErrorCode ierr; 1069 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1070 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1071 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1072 PLA_Obj alpha = NULL,beta = NULL; 1073 1074 PetscFunctionBegin; 1075 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1076 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1077 1078 /* 1079 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1080 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1081 */ 1082 1083 /* do the multiply in PLA */ 1084 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1085 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1086 CHKMEMQ; 1087 1088 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1089 CHKMEMQ; 1090 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1091 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1092 1093 /* 1094 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1095 */ 1096 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1102 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1103 { 1104 PetscErrorCode ierr; 1105 PetscInt m=A->rmap->n,n=B->cmap->n; 1106 Mat Cmat; 1107 1108 PetscFunctionBegin; 1109 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1110 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1111 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1112 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1113 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1114 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1116 1117 *C = Cmat; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1123 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1124 { 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 if (scall == MAT_INITIAL_MATRIX){ 1129 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1130 } 1131 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatSolve_MPIDense" 1137 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1138 { 1139 MPI_Comm comm = ((PetscObject)A)->comm; 1140 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1141 PetscErrorCode ierr; 1142 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1143 PetscScalar *array; 1144 PetscReal one = 1.0; 1145 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1146 PLA_Obj v_pla = NULL; 1147 PetscScalar *loc_buf; 1148 Vec loc_x; 1149 1150 PetscFunctionBegin; 1151 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1152 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1153 1154 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1155 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1156 PLA_Obj_set_to_zero(v_pla); 1157 1158 /* Copy b into rhs_pla */ 1159 PLA_API_begin(); 1160 PLA_Obj_API_open(v_pla); 1161 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1162 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1163 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1164 PLA_Obj_API_close(v_pla); 1165 PLA_API_end(); 1166 1167 if (A->factor == MAT_FACTOR_LU){ 1168 /* Apply the permutations to the right hand sides */ 1169 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1170 1171 /* Solve L y = b, overwriting b with y */ 1172 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1173 1174 /* Solve U x = y (=b), overwriting b with x */ 1175 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1176 } else { /*MAT_FACTOR_CHOLESKY */ 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1178 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1179 PLA_NONUNIT_DIAG,lu->A,v_pla); 1180 } 1181 1182 /* Copy PLAPACK x into Petsc vector x */ 1183 PLA_Obj_local_length(v_pla, &loc_m); 1184 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1185 PLA_Obj_local_stride(v_pla, &loc_stride); 1186 /* 1187 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1188 */ 1189 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1190 if (!lu->pla_solved){ 1191 1192 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1193 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1194 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1195 1196 /* Create IS and cts for VecScatterring */ 1197 PLA_Obj_local_length(v_pla, &loc_m); 1198 PLA_Obj_local_stride(v_pla, &loc_stride); 1199 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1200 idx_petsc = idx_pla + loc_m; 1201 1202 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1203 for (i=0; i<loc_m; i+=lu->nb){ 1204 j = 0; 1205 while (j < lu->nb && i+j < loc_m){ 1206 idx_petsc[i+j] = rstart + j; j++; 1207 } 1208 rstart += size*lu->nb; 1209 } 1210 1211 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1212 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1215 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1216 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1217 } 1218 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 1221 /* Free data */ 1222 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1223 PLA_Obj_free(&v_pla); 1224 1225 lu->pla_solved = PETSC_TRUE; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1231 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1232 { 1233 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1234 PetscErrorCode ierr; 1235 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1236 PetscInt info_pla=0; 1237 PetscScalar *array,one = 1.0; 1238 1239 PetscFunctionBegin; 1240 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1241 PLA_Obj_free(&lu->A); 1242 PLA_Obj_free (&lu->pivots); 1243 } 1244 /* Create PLAPACK matrix object */ 1245 lu->A = NULL; lu->pivots = NULL; 1246 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1247 PLA_Obj_set_to_zero(lu->A); 1248 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1249 1250 /* Copy A into lu->A */ 1251 PLA_API_begin(); 1252 PLA_Obj_API_open(lu->A); 1253 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1254 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1255 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1256 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1257 PLA_Obj_API_close(lu->A); 1258 PLA_API_end(); 1259 1260 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1261 info_pla = PLA_LU(lu->A,lu->pivots); 1262 if (info_pla != 0) 1263 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1264 1265 lu->CleanUpPlapack = PETSC_TRUE; 1266 lu->rstart = rstart; 1267 lu->mstruct = SAME_NONZERO_PATTERN; 1268 F->ops->solve = MatSolve_MPIDense; 1269 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1275 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1276 { 1277 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1278 PetscErrorCode ierr; 1279 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1280 PetscInt info_pla=0; 1281 PetscScalar *array,one = 1.0; 1282 1283 PetscFunctionBegin; 1284 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1285 PLA_Obj_free(&lu->A); 1286 } 1287 /* Create PLAPACK matrix object */ 1288 lu->A = NULL; 1289 lu->pivots = NULL; 1290 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1291 1292 /* Copy A into lu->A */ 1293 PLA_API_begin(); 1294 PLA_Obj_API_open(lu->A); 1295 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1296 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1297 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1298 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1299 PLA_Obj_API_close(lu->A); 1300 PLA_API_end(); 1301 1302 /* Factor P A -> Chol */ 1303 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1304 if (info_pla != 0) 1305 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1306 1307 lu->CleanUpPlapack = PETSC_TRUE; 1308 lu->rstart = rstart; 1309 lu->mstruct = SAME_NONZERO_PATTERN; 1310 F->ops->solve = MatSolve_MPIDense; 1311 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private" 1317 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat F,Mat A,const MatFactorInfo *info) 1318 { 1319 Mat B = F; 1320 Mat_Plapack *lu; 1321 PetscErrorCode ierr; 1322 PetscInt M=A->rmap->N; 1323 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1324 PetscMPIInt size; 1325 PetscInt ierror; 1326 1327 PetscFunctionBegin; 1328 lu = (Mat_Plapack*)(B->spptr); 1329 1330 /* Set default Plapack parameters */ 1331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1332 lu->nprows = 1; lu->npcols = size; 1333 ierror = 0; 1334 lu->nb = M/size; 1335 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1336 1337 /* Set runtime options */ 1338 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1339 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1341 1342 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1344 if (ierror){ 1345 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1346 } else { 1347 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1348 } 1349 lu->ierror = ierror; 1350 1351 lu->nb_alg = 0; 1352 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1353 if (lu->nb_alg){ 1354 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1355 } 1356 PetscOptionsEnd(); 1357 1358 1359 /* Create a 2D communicator */ 1360 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1361 lu->comm_2d = comm_2d; 1362 1363 /* Create object distribution template */ 1364 lu->templ = NULL; 1365 PLA_Temp_create(lu->nb, 0, &lu->templ); 1366 1367 /* Use suggested nb_alg if it is not provided by user */ 1368 if (lu->nb_alg == 0){ 1369 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1370 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1371 } 1372 1373 /* Set the datatype */ 1374 #if defined(PETSC_USE_COMPLEX) 1375 lu->datatype = MPI_DOUBLE_COMPLEX; 1376 #else 1377 lu->datatype = MPI_DOUBLE; 1378 #endif 1379 1380 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1381 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1382 lu->CleanUpPlapack = PETSC_TRUE; 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* Note the Petsc perm permutation is ignored */ 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1389 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1390 { 1391 PetscErrorCode ierr; 1392 PetscTruth issymmetric,set; 1393 1394 PetscFunctionBegin; 1395 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1396 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1397 ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr); 1398 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1399 PetscFunctionReturn(0); 1400 } 1401 1402 /* Note the Petsc r and c permutations are ignored */ 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1405 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1406 { 1407 PetscErrorCode ierr; 1408 PetscInt M = A->rmap->N; 1409 Mat_Plapack *lu; 1410 1411 PetscFunctionBegin; 1412 ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr); 1413 lu = (Mat_Plapack*)F->spptr; 1414 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1415 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1421 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1422 { 1423 PetscErrorCode ierr; 1424 Mat_Plapack *lu; 1425 PetscMPIInt size; 1426 PetscInt M=A->rmap->N; 1427 1428 PetscFunctionBegin; 1429 /* Create the factorization matrix */ 1430 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1431 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1432 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1433 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1434 (*F)->spptr = (void*)lu; 1435 1436 /* Set default Plapack parameters */ 1437 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1438 lu->nb = M/size; 1439 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1440 1441 /* Set runtime options */ 1442 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1443 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1444 PetscOptionsEnd(); 1445 1446 /* Create object distribution template */ 1447 lu->templ = NULL; 1448 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1449 1450 /* Set the datatype */ 1451 #if defined(PETSC_USE_COMPLEX) 1452 lu->datatype = MPI_DOUBLE_COMPLEX; 1453 #else 1454 lu->datatype = MPI_DOUBLE; 1455 #endif 1456 1457 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1458 1459 1460 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1461 1462 if (ftype == MAT_FACTOR_LU) { 1463 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1464 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1465 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1466 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1467 (*F)->factor = ftype; 1468 PetscFunctionReturn(0); 1469 } 1470 #endif 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1474 MatGetRow_MPIDense, 1475 MatRestoreRow_MPIDense, 1476 MatMult_MPIDense, 1477 /* 4*/ MatMultAdd_MPIDense, 1478 MatMultTranspose_MPIDense, 1479 MatMultTransposeAdd_MPIDense, 1480 0, 1481 0, 1482 0, 1483 /*10*/ 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPIDense, 1488 /*15*/ MatGetInfo_MPIDense, 1489 MatEqual_MPIDense, 1490 MatGetDiagonal_MPIDense, 1491 MatDiagonalScale_MPIDense, 1492 MatNorm_MPIDense, 1493 /*20*/ MatAssemblyBegin_MPIDense, 1494 MatAssemblyEnd_MPIDense, 1495 0, 1496 MatSetOption_MPIDense, 1497 MatZeroEntries_MPIDense, 1498 /*25*/ MatZeroRows_MPIDense, 1499 0, 1500 0, 1501 0, 1502 0, 1503 /*30*/ MatSetUpPreallocation_MPIDense, 1504 0, 1505 0, 1506 MatGetArray_MPIDense, 1507 MatRestoreArray_MPIDense, 1508 /*35*/ MatDuplicate_MPIDense, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*40*/ 0, 1514 MatGetSubMatrices_MPIDense, 1515 0, 1516 MatGetValues_MPIDense, 1517 0, 1518 /*45*/ 0, 1519 MatScale_MPIDense, 1520 0, 1521 0, 1522 0, 1523 /*50*/ 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*55*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*60*/ MatGetSubMatrix_MPIDense, 1534 MatDestroy_MPIDense, 1535 MatView_MPIDense, 1536 0, 1537 0, 1538 /*65*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*70*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*75*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*80*/ 0, 1554 0, 1555 0, 1556 0, 1557 /*84*/ MatLoad_MPIDense, 1558 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*90*/ 1564 #if defined(PETSC_HAVE_PLAPACK) 1565 MatMatMult_MPIDense_MPIDense, 1566 MatMatMultSymbolic_MPIDense_MPIDense, 1567 MatMatMultNumeric_MPIDense_MPIDense, 1568 #else 1569 0, 1570 0, 1571 0, 1572 #endif 1573 0, 1574 /*95*/ 0, 1575 0, 1576 0, 1577 0}; 1578 1579 EXTERN_C_BEGIN 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1583 { 1584 Mat_MPIDense *a; 1585 PetscErrorCode ierr; 1586 1587 PetscFunctionBegin; 1588 mat->preallocated = PETSC_TRUE; 1589 /* Note: For now, when data is specified above, this assumes the user correctly 1590 allocates the local dense storage space. We should add error checking. */ 1591 1592 a = (Mat_MPIDense*)mat->data; 1593 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1594 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1595 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1596 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1597 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 EXTERN_C_END 1601 1602 /*MC 1603 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1604 1605 Options Database Keys: 1606 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1607 1608 Level: beginner 1609 1610 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1611 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1612 (run config/configure.py with the option --download-plapack) 1613 1614 1615 Options Database Keys: 1616 . -mat_plapack_nprows <n> - number of rows in processor partition 1617 . -mat_plapack_npcols <n> - number of columns in processor partition 1618 . -mat_plapack_nb <n> - block size of template vector 1619 . -mat_plapack_nb_alg <n> - algorithmic block size 1620 - -mat_plapack_ckerror <n> - error checking flag 1621 1622 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1623 M*/ 1624 1625 EXTERN_C_BEGIN 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "MatCreate_MPIDense" 1628 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1629 { 1630 Mat_MPIDense *a; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1635 mat->data = (void*)a; 1636 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1637 mat->mapping = 0; 1638 1639 mat->insertmode = NOT_SET_VALUES; 1640 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1641 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1642 1643 mat->rmap->bs = mat->cmap->bs = 1; 1644 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1645 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1646 a->nvec = mat->cmap->n; 1647 1648 /* build cache for off array entries formed */ 1649 a->donotstash = PETSC_FALSE; 1650 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1651 1652 /* stuff used for matrix vector multiply */ 1653 a->lvec = 0; 1654 a->Mvctx = 0; 1655 a->roworiented = PETSC_TRUE; 1656 1657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1658 "MatGetDiagonalBlock_MPIDense", 1659 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1661 "MatMPIDenseSetPreallocation_MPIDense", 1662 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1664 "MatMatMult_MPIAIJ_MPIDense", 1665 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1667 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1668 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1670 "MatMatMultNumeric_MPIAIJ_MPIDense", 1671 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1672 #if defined(PETSC_HAVE_PLAPACK) 1673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_petsc_C", 1674 "MatGetFactor_mpidense_petsc", 1675 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1676 #if defined(PETSC_HAVE_PLAPACK) 1677 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1678 #endif 1679 1680 #endif 1681 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1682 1683 PetscFunctionReturn(0); 1684 } 1685 EXTERN_C_END 1686 1687 /*MC 1688 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1689 1690 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1691 and MATMPIDENSE otherwise. 1692 1693 Options Database Keys: 1694 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1695 1696 Level: beginner 1697 1698 1699 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1700 M*/ 1701 1702 EXTERN_C_BEGIN 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatCreate_Dense" 1705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1706 { 1707 PetscErrorCode ierr; 1708 PetscMPIInt size; 1709 1710 PetscFunctionBegin; 1711 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1712 if (size == 1) { 1713 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1714 } else { 1715 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1723 /*@C 1724 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1725 1726 Not collective 1727 1728 Input Parameters: 1729 . A - the matrix 1730 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1731 to control all matrix memory allocation. 1732 1733 Notes: 1734 The dense format is fully compatible with standard Fortran 77 1735 storage by columns. 1736 1737 The data input variable is intended primarily for Fortran programmers 1738 who wish to allocate their own matrix memory space. Most users should 1739 set data=PETSC_NULL. 1740 1741 Level: intermediate 1742 1743 .keywords: matrix,dense, parallel 1744 1745 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1746 @*/ 1747 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1748 { 1749 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1750 1751 PetscFunctionBegin; 1752 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1753 if (f) { 1754 ierr = (*f)(mat,data);CHKERRQ(ierr); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatCreateMPIDense" 1761 /*@C 1762 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1763 1764 Collective on MPI_Comm 1765 1766 Input Parameters: 1767 + comm - MPI communicator 1768 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1769 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1770 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1771 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1772 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1773 to control all matrix memory allocation. 1774 1775 Output Parameter: 1776 . A - the matrix 1777 1778 Notes: 1779 The dense format is fully compatible with standard Fortran 77 1780 storage by columns. 1781 1782 The data input variable is intended primarily for Fortran programmers 1783 who wish to allocate their own matrix memory space. Most users should 1784 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1785 1786 The user MUST specify either the local or global matrix dimensions 1787 (possibly both). 1788 1789 Level: intermediate 1790 1791 .keywords: matrix,dense, parallel 1792 1793 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1794 @*/ 1795 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1796 { 1797 PetscErrorCode ierr; 1798 PetscMPIInt size; 1799 1800 PetscFunctionBegin; 1801 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1802 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1803 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1804 if (size > 1) { 1805 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1806 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1809 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatDuplicate_MPIDense" 1816 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1817 { 1818 Mat mat; 1819 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 *newmat = 0; 1824 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1825 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1826 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1827 a = (Mat_MPIDense*)mat->data; 1828 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1829 mat->factor = A->factor; 1830 mat->assembled = PETSC_TRUE; 1831 mat->preallocated = PETSC_TRUE; 1832 1833 mat->rmap->rstart = A->rmap->rstart; 1834 mat->rmap->rend = A->rmap->rend; 1835 a->size = oldmat->size; 1836 a->rank = oldmat->rank; 1837 mat->insertmode = NOT_SET_VALUES; 1838 a->nvec = oldmat->nvec; 1839 a->donotstash = oldmat->donotstash; 1840 1841 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1842 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1843 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1844 1845 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1846 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1847 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1848 1849 *newmat = mat; 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #include "petscsys.h" 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1857 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1858 { 1859 PetscErrorCode ierr; 1860 PetscMPIInt rank,size; 1861 PetscInt *rowners,i,m,nz,j; 1862 PetscScalar *array,*vals,*vals_ptr; 1863 MPI_Status status; 1864 1865 PetscFunctionBegin; 1866 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1868 1869 /* determine ownership of all rows */ 1870 m = M/size + ((M % size) > rank); 1871 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1872 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1873 rowners[0] = 0; 1874 for (i=2; i<=size; i++) { 1875 rowners[i] += rowners[i-1]; 1876 } 1877 1878 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1879 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1880 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1881 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1882 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1883 1884 if (!rank) { 1885 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1886 1887 /* read in my part of the matrix numerical values */ 1888 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1889 1890 /* insert into matrix-by row (this is why cannot directly read into array */ 1891 vals_ptr = vals; 1892 for (i=0; i<m; i++) { 1893 for (j=0; j<N; j++) { 1894 array[i + j*m] = *vals_ptr++; 1895 } 1896 } 1897 1898 /* read in other processors and ship out */ 1899 for (i=1; i<size; i++) { 1900 nz = (rowners[i+1] - rowners[i])*N; 1901 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1902 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1903 } 1904 } else { 1905 /* receive numeric values */ 1906 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1907 1908 /* receive message of values*/ 1909 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1910 1911 /* insert into matrix-by row (this is why cannot directly read into array */ 1912 vals_ptr = vals; 1913 for (i=0; i<m; i++) { 1914 for (j=0; j<N; j++) { 1915 array[i + j*m] = *vals_ptr++; 1916 } 1917 } 1918 } 1919 ierr = PetscFree(rowners);CHKERRQ(ierr); 1920 ierr = PetscFree(vals);CHKERRQ(ierr); 1921 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1922 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatLoad_MPIDense" 1928 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1929 { 1930 Mat A; 1931 PetscScalar *vals,*svals; 1932 MPI_Comm comm = ((PetscObject)viewer)->comm; 1933 MPI_Status status; 1934 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1935 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1936 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1937 PetscInt i,nz,j,rstart,rend; 1938 int fd; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1943 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1944 if (!rank) { 1945 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1946 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1947 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1948 } 1949 1950 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1951 M = header[1]; N = header[2]; nz = header[3]; 1952 1953 /* 1954 Handle case where matrix is stored on disk as a dense matrix 1955 */ 1956 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1957 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* determine ownership of all rows */ 1962 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1963 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1964 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1965 rowners[0] = 0; 1966 for (i=2; i<=size; i++) { 1967 rowners[i] += rowners[i-1]; 1968 } 1969 rstart = rowners[rank]; 1970 rend = rowners[rank+1]; 1971 1972 /* distribute row lengths to all processors */ 1973 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1974 offlens = ourlens + (rend-rstart); 1975 if (!rank) { 1976 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1977 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1978 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1979 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1980 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1981 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1982 } else { 1983 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1984 } 1985 1986 if (!rank) { 1987 /* calculate the number of nonzeros on each processor */ 1988 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1989 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1990 for (i=0; i<size; i++) { 1991 for (j=rowners[i]; j< rowners[i+1]; j++) { 1992 procsnz[i] += rowlengths[j]; 1993 } 1994 } 1995 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1996 1997 /* determine max buffer needed and allocate it */ 1998 maxnz = 0; 1999 for (i=0; i<size; i++) { 2000 maxnz = PetscMax(maxnz,procsnz[i]); 2001 } 2002 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2003 2004 /* read in my part of the matrix column indices */ 2005 nz = procsnz[0]; 2006 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2007 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2008 2009 /* read in every one elses and ship off */ 2010 for (i=1; i<size; i++) { 2011 nz = procsnz[i]; 2012 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2013 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2014 } 2015 ierr = PetscFree(cols);CHKERRQ(ierr); 2016 } else { 2017 /* determine buffer space needed for message */ 2018 nz = 0; 2019 for (i=0; i<m; i++) { 2020 nz += ourlens[i]; 2021 } 2022 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2023 2024 /* receive message of column indices*/ 2025 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2026 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2027 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2028 } 2029 2030 /* loop over local rows, determining number of off diagonal entries */ 2031 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2032 jj = 0; 2033 for (i=0; i<m; i++) { 2034 for (j=0; j<ourlens[i]; j++) { 2035 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2036 jj++; 2037 } 2038 } 2039 2040 /* create our matrix */ 2041 for (i=0; i<m; i++) { 2042 ourlens[i] -= offlens[i]; 2043 } 2044 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2045 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2046 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2047 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2048 A = *newmat; 2049 for (i=0; i<m; i++) { 2050 ourlens[i] += offlens[i]; 2051 } 2052 2053 if (!rank) { 2054 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2055 2056 /* read in my part of the matrix numerical values */ 2057 nz = procsnz[0]; 2058 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2059 2060 /* insert into matrix */ 2061 jj = rstart; 2062 smycols = mycols; 2063 svals = vals; 2064 for (i=0; i<m; i++) { 2065 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2066 smycols += ourlens[i]; 2067 svals += ourlens[i]; 2068 jj++; 2069 } 2070 2071 /* read in other processors and ship out */ 2072 for (i=1; i<size; i++) { 2073 nz = procsnz[i]; 2074 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2075 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2076 } 2077 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2078 } else { 2079 /* receive numeric values */ 2080 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2081 2082 /* receive message of values*/ 2083 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2084 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2085 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2086 2087 /* insert into matrix */ 2088 jj = rstart; 2089 smycols = mycols; 2090 svals = vals; 2091 for (i=0; i<m; i++) { 2092 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2093 smycols += ourlens[i]; 2094 svals += ourlens[i]; 2095 jj++; 2096 } 2097 } 2098 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2099 ierr = PetscFree(vals);CHKERRQ(ierr); 2100 ierr = PetscFree(mycols);CHKERRQ(ierr); 2101 ierr = PetscFree(rowners);CHKERRQ(ierr); 2102 2103 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2104 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatEqual_MPIDense" 2110 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2111 { 2112 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2113 Mat a,b; 2114 PetscTruth flg; 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 a = matA->A; 2119 b = matB->A; 2120 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2121 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #if defined(PETSC_HAVE_PLAPACK) 2126 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2129 /*@C 2130 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2131 Level: developer 2132 2133 .keywords: Petsc, destroy, package, PLAPACK 2134 .seealso: PetscFinalize() 2135 @*/ 2136 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2137 { 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = PLA_Finalize();CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2147 /*@C 2148 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2149 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2150 2151 Input Parameter: 2152 . comm - the communicator the matrix lives on 2153 2154 Level: developer 2155 2156 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2157 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2158 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2159 cannot overlap. 2160 2161 .keywords: Petsc, initialize, package, PLAPACK 2162 .seealso: PetscInitializePackage(), PetscInitialize() 2163 @*/ 2164 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2165 { 2166 PetscMPIInt size; 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 if (!PLA_Initialized(PETSC_NULL)) { 2171 2172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2173 Plapack_nprows = 1; 2174 Plapack_npcols = size; 2175 2176 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2177 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2178 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2179 #if defined(PETSC_USE_DEBUG) 2180 Plapack_ierror = 3; 2181 #else 2182 Plapack_ierror = 0; 2183 #endif 2184 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2185 if (Plapack_ierror){ 2186 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2187 } else { 2188 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2189 } 2190 2191 Plapack_nb_alg = 0; 2192 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2193 if (Plapack_nb_alg) { 2194 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2195 } 2196 PetscOptionsEnd(); 2197 2198 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2199 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2200 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 2206 #endif 2207