1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap.N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap.n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap.n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 v = vv; 625 for (k=1; k<size; k++) { 626 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } 642 } 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 649 { 650 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 651 PetscErrorCode ierr; 652 PetscMPIInt size = mdn->size,rank = mdn->rank; 653 PetscViewerType vtype; 654 PetscTruth iascii,isdraw; 655 PetscViewer sviewer; 656 PetscViewerFormat format; 657 #if defined(PETSC_HAVE_PLAPACK) 658 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 659 #endif 660 661 PetscFunctionBegin; 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 664 if (iascii) { 665 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 666 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 667 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 668 MatInfo info; 669 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 670 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 671 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 672 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 673 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 674 #if defined(PETSC_HAVE_PLAPACK) 675 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 680 #endif 681 PetscFunctionReturn(0); 682 } else if (format == PETSC_VIEWER_ASCII_INFO) { 683 PetscFunctionReturn(0); 684 } 685 } else if (isdraw) { 686 PetscDraw draw; 687 PetscTruth isnull; 688 689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 691 if (isnull) PetscFunctionReturn(0); 692 } 693 694 if (size == 1) { 695 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 696 } else { 697 /* assemble the entire matrix onto first processor. */ 698 Mat A; 699 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 700 PetscInt *cols; 701 PetscScalar *vals; 702 703 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 708 } 709 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 710 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 711 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 712 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 713 714 /* Copy the matrix ... This isn't the most efficient means, 715 but it's quick for now */ 716 A->insertmode = INSERT_VALUES; 717 row = mat->rmap.rstart; m = mdn->A->rmap.n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 730 } 731 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(A);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatView_MPIDense" 740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 741 { 742 PetscErrorCode ierr; 743 PetscTruth iascii,isbinary,isdraw,issocket; 744 745 PetscFunctionBegin; 746 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 751 752 if (iascii || issocket || isdraw) { 753 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 754 } else if (isbinary) { 755 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 756 } else { 757 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->rows_global = (double)A->rmap.N; 773 info->columns_global = (double)A->cmap.N; 774 info->rows_local = (double)A->rmap.n; 775 info->columns_local = (double)A->cmap.N; 776 info->block_size = 1.0; 777 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 788 info->nz_used = irecv[0]; 789 info->nz_allocated = irecv[1]; 790 info->nz_unneeded = irecv[2]; 791 info->memory = irecv[3]; 792 info->mallocs = irecv[4]; 793 } else if (flag == MAT_GLOBAL_SUM) { 794 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_USE_HASH_TABLE: 827 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 828 break; 829 case MAT_IGNORE_OFF_PROC_ENTRIES: 830 a->donotstash = flg; 831 break; 832 case MAT_SYMMETRIC: 833 case MAT_STRUCTURALLY_SYMMETRIC: 834 case MAT_HERMITIAN: 835 case MAT_SYMMETRY_ETERNAL: 836 case MAT_IGNORE_LOWER_TRIANGULAR: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 default: 840 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDiagonalScale_MPIDense" 848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 849 { 850 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 851 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 852 PetscScalar *l,*r,x,*v; 853 PetscErrorCode ierr; 854 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 855 856 PetscFunctionBegin; 857 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 858 if (ll) { 859 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 860 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 861 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 862 for (i=0; i<m; i++) { 863 x = l[i]; 864 v = mat->v + i; 865 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 866 } 867 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 if (rr) { 871 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 872 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 873 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 876 for (i=0; i<n; i++) { 877 x = r[i]; 878 v = mat->v + i*m; 879 for (j=0; j<m; j++) { (*v++) *= x;} 880 } 881 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 882 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatNorm_MPIDense" 889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 890 { 891 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 892 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 893 PetscErrorCode ierr; 894 PetscInt i,j; 895 PetscReal sum = 0.0; 896 PetscScalar *v = mat->v; 897 898 PetscFunctionBegin; 899 if (mdn->size == 1) { 900 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 901 } else { 902 if (type == NORM_FROBENIUS) { 903 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 904 #if defined(PETSC_USE_COMPLEX) 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 #else 907 sum += (*v)*(*v); v++; 908 #endif 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 911 *nrm = sqrt(*nrm); 912 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 916 tmp2 = tmp + A->cmap.N; 917 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap.n; j++) { 921 for (i=0; i<mdn->A->rmap.n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 for (j=0; j<A->cmap.N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree(tmp);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 937 } 938 } 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatTranspose_MPIDense" 944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 947 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 948 Mat B; 949 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 950 PetscErrorCode ierr; 951 PetscInt j,i; 952 PetscScalar *v; 953 954 PetscFunctionBegin; 955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 956 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 957 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 958 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 959 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 960 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 961 } else { 962 B = *matout; 963 } 964 965 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 966 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 967 for (i=0; i<m; i++) rwork[i] = rstart + i; 968 for (j=0; j<n; j++) { 969 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 970 v += m; 971 } 972 ierr = PetscFree(rwork);CHKERRQ(ierr); 973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 976 *matout = B; 977 } else { 978 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #include "petscblaslapack.h" 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatScale_MPIDense" 986 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 987 { 988 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 989 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 990 PetscScalar oalpha = alpha; 991 PetscErrorCode ierr; 992 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 993 994 PetscFunctionBegin; 995 BLASscal_(&nz,&oalpha,a->v,&one); 996 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1004 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1005 { 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #if defined(PETSC_HAVE_PLAPACK) 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1017 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1018 { 1019 Mat_Plapack *lu; 1020 PetscErrorCode ierr; 1021 PetscInt M=A->rmap.N; 1022 MPI_Comm comm=((PetscObject)A)->comm; 1023 PetscMPIInt size; 1024 1025 PetscFunctionBegin; 1026 if (A->spptr) PetscFunctionReturn(0); 1027 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1028 A->spptr = (void*)lu; 1029 1030 lu = (Mat_Plapack*)(A->spptr); 1031 1032 /* Set default Plapack parameters */ 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 lu->nb = M/size; 1035 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1036 1037 /* Set runtime options */ 1038 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1039 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1040 PetscOptionsEnd(); 1041 1042 /* Create object distribution template */ 1043 lu->templ = NULL; 1044 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1045 1046 /* Set the datatype */ 1047 #if defined(PETSC_USE_COMPLEX) 1048 lu->datatype = MPI_DOUBLE_COMPLEX; 1049 #else 1050 lu->datatype = MPI_DOUBLE; 1051 #endif 1052 1053 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1054 1055 1056 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1062 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1063 { 1064 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1065 PetscErrorCode ierr; 1066 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1067 PetscScalar *array; 1068 PetscReal one = 1.0; 1069 1070 PetscFunctionBegin; 1071 /* Copy A into F->lu->A */ 1072 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1073 ierr = PLA_API_begin();CHKERRQ(ierr); 1074 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1075 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1076 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1077 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1079 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1080 ierr = PLA_API_end();CHKERRQ(ierr); 1081 lu->rstart = rstart; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1087 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1088 { 1089 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1090 PetscErrorCode ierr; 1091 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1092 PetscScalar *array; 1093 PetscReal one = 1.0; 1094 1095 PetscFunctionBegin; 1096 /* Copy F into A->lu->A */ 1097 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1098 ierr = PLA_API_begin();CHKERRQ(ierr); 1099 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1100 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1101 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1102 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1103 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1104 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1105 ierr = PLA_API_end();CHKERRQ(ierr); 1106 lu->rstart = rstart; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1112 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1113 { 1114 PetscErrorCode ierr; 1115 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1116 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1117 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1118 PLA_Obj alpha = NULL,beta = NULL; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1122 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1123 1124 /* do the multiply in PLA */ 1125 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1126 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1127 CHKMEMQ; 1128 1129 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);CHKERRQ(ierr); 1130 CHKMEMQ; 1131 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1132 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1133 1134 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1140 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1141 { 1142 PetscErrorCode ierr; 1143 PetscInt m=A->rmap.n,n=B->cmap.n; 1144 Mat Cmat; 1145 1146 PetscFunctionBegin; 1147 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1148 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1149 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1150 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1151 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1153 ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1154 ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1155 ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1156 *C = Cmat; 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1162 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1163 { 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 if (scall == MAT_INITIAL_MATRIX){ 1168 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1169 } 1170 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSolve_MPIDense" 1177 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1178 { 1179 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1180 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1181 PetscErrorCode ierr; 1182 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1183 PetscScalar *array; 1184 PetscReal one = 1.0; 1185 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1186 PLA_Obj v_pla = NULL; 1187 PetscScalar *loc_buf; 1188 Vec loc_x; 1189 1190 PetscFunctionBegin; 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 1193 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1194 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1195 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1196 1197 /* Copy b into rhs_pla */ 1198 ierr = PLA_API_begin();CHKERRQ(ierr); 1199 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1200 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1201 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1202 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1203 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1204 ierr = PLA_API_end();CHKERRQ(ierr); 1205 1206 if (A->factor == FACTOR_LU){ 1207 /* Apply the permutations to the right hand sides */ 1208 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1209 1210 /* Solve L y = b, overwriting b with y */ 1211 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1212 1213 /* Solve U x = y (=b), overwriting b with x */ 1214 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1215 } else { /* FACTOR_CHOLESKY */ 1216 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1217 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1218 } 1219 1220 /* Copy PLAPACK x into Petsc vector x */ 1221 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1222 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1223 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1224 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1225 if (!lu->pla_solved){ 1226 1227 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1228 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1229 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1230 1231 /* Create IS and cts for VecScatterring */ 1232 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1233 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1234 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1235 idx_petsc = idx_pla + loc_m; 1236 1237 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1238 for (i=0; i<loc_m; i+=lu->nb){ 1239 j = 0; 1240 while (j < lu->nb && i+j < loc_m){ 1241 idx_petsc[i+j] = rstart + j; j++; 1242 } 1243 rstart += size*lu->nb; 1244 } 1245 1246 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1247 1248 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1249 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1250 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1251 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1252 } 1253 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255 1256 /* Free data */ 1257 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1258 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1259 1260 lu->pla_solved = PETSC_TRUE; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1266 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1267 { 1268 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1269 PetscErrorCode ierr; 1270 PetscInt info_pla; 1271 1272 PetscFunctionBegin; 1273 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1274 1275 /* Copy A into F->lu->A */ 1276 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1277 1278 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1279 info_pla = PLA_LU(lu->A,lu->pivots); 1280 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1281 1282 (*F)->assembled = PETSC_TRUE; 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1288 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1289 { 1290 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1291 PetscErrorCode ierr; 1292 PetscInt info_pla; 1293 1294 PetscFunctionBegin; 1295 1296 /* Copy A into F->lu->A */ 1297 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1298 1299 /* Factor P A -> Chol */ 1300 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1301 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1302 1303 (*F)->assembled = PETSC_TRUE; 1304 PetscFunctionReturn(0); 1305 } 1306 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1310 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1311 { 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* Create the factorization matrix */ 1316 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1317 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1318 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1319 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc r and c permutations are ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt M = A->rmap.N; 1330 Mat_Plapack *lu; 1331 1332 PetscFunctionBegin; 1333 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1334 lu = (Mat_Plapack*)(*F)->spptr; 1335 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1336 (*F)->factor = FACTOR_LU; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 /* Note the Petsc perm permutation is ignored */ 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1343 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1344 { 1345 PetscErrorCode ierr; 1346 PetscTruth issymmetric,set; 1347 1348 PetscFunctionBegin; 1349 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1350 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1351 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1352 (*F)->factor = FACTOR_CHOLESKY; 1353 PetscFunctionReturn(0); 1354 } 1355 #endif 1356 1357 /* -------------------------------------------------------------------*/ 1358 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1359 MatGetRow_MPIDense, 1360 MatRestoreRow_MPIDense, 1361 MatMult_MPIDense, 1362 /* 4*/ MatMultAdd_MPIDense, 1363 MatMultTranspose_MPIDense, 1364 MatMultTransposeAdd_MPIDense, 1365 #if defined(PETSC_HAVE_PLAPACK) 1366 MatSolve_MPIDense, 1367 #else 1368 0, 1369 #endif 1370 0, 1371 0, 1372 /*10*/ 0, 1373 0, 1374 0, 1375 0, 1376 MatTranspose_MPIDense, 1377 /*15*/ MatGetInfo_MPIDense, 1378 MatEqual_MPIDense, 1379 MatGetDiagonal_MPIDense, 1380 MatDiagonalScale_MPIDense, 1381 MatNorm_MPIDense, 1382 /*20*/ MatAssemblyBegin_MPIDense, 1383 MatAssemblyEnd_MPIDense, 1384 0, 1385 MatSetOption_MPIDense, 1386 MatZeroEntries_MPIDense, 1387 /*25*/ MatZeroRows_MPIDense, 1388 #if defined(PETSC_HAVE_PLAPACK) 1389 MatLUFactorSymbolic_MPIDense, 1390 MatLUFactorNumeric_MPIDense, 1391 MatCholeskyFactorSymbolic_MPIDense, 1392 MatCholeskyFactorNumeric_MPIDense, 1393 #else 1394 0, 1395 0, 1396 0, 1397 0, 1398 #endif 1399 /*30*/ MatSetUpPreallocation_MPIDense, 1400 0, 1401 0, 1402 MatGetArray_MPIDense, 1403 MatRestoreArray_MPIDense, 1404 /*35*/ MatDuplicate_MPIDense, 1405 0, 1406 0, 1407 0, 1408 0, 1409 /*40*/ 0, 1410 MatGetSubMatrices_MPIDense, 1411 0, 1412 MatGetValues_MPIDense, 1413 0, 1414 /*45*/ 0, 1415 MatScale_MPIDense, 1416 0, 1417 0, 1418 0, 1419 /*50*/ 0, 1420 0, 1421 0, 1422 0, 1423 0, 1424 /*55*/ 0, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /*60*/ MatGetSubMatrix_MPIDense, 1430 MatDestroy_MPIDense, 1431 MatView_MPIDense, 1432 0, 1433 0, 1434 /*65*/ 0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 /*70*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*75*/ 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /*80*/ 0, 1450 0, 1451 0, 1452 0, 1453 /*84*/ MatLoad_MPIDense, 1454 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /*90*/ 1460 #if defined(PETSC_HAVE_PLAPACK) 1461 MatMatMult_MPIDense_MPIDense, 1462 MatMatMultSymbolic_MPIDense_MPIDense, 1463 MatMatMultNumeric_MPIDense_MPIDense, 1464 #else 1465 0, 1466 0, 1467 0, 1468 #endif 1469 0, 1470 /*95*/ 0, 1471 0, 1472 0, 1473 0}; 1474 1475 EXTERN_C_BEGIN 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1478 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1479 { 1480 Mat_MPIDense *a; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 mat->preallocated = PETSC_TRUE; 1485 /* Note: For now, when data is specified above, this assumes the user correctly 1486 allocates the local dense storage space. We should add error checking. */ 1487 1488 a = (Mat_MPIDense*)mat->data; 1489 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1490 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1491 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1492 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1493 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 EXTERN_C_END 1497 1498 EXTERN_C_BEGIN 1499 #if defined(PETSC_HAVE_PLAPACK) 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1502 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1503 { 1504 PetscFunctionBegin; 1505 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1506 PetscFunctionReturn(0); 1507 } 1508 #endif 1509 EXTERN_C_END 1510 1511 /*MC 1512 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1513 1514 Options Database Keys: 1515 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1516 1517 Level: beginner 1518 1519 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1520 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1521 (run config/configure.py with the option --download-plapack) 1522 1523 1524 Options Database Keys: 1525 . -mat_plapack_nprows <n> - number of rows in processor partition 1526 . -mat_plapack_npcols <n> - number of columns in processor partition 1527 . -mat_plapack_nb <n> - block size of template vector 1528 . -mat_plapack_nb_alg <n> - algorithmic block size 1529 - -mat_plapack_ckerror <n> - error checking flag 1530 1531 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1532 M*/ 1533 1534 EXTERN_C_BEGIN 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatCreate_MPIDense" 1537 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1538 { 1539 Mat_MPIDense *a; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1544 mat->data = (void*)a; 1545 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1546 mat->factor = 0; 1547 mat->mapping = 0; 1548 1549 mat->insertmode = NOT_SET_VALUES; 1550 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1551 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1552 1553 mat->rmap.bs = mat->cmap.bs = 1; 1554 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1555 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1556 a->nvec = mat->cmap.n; 1557 1558 /* build cache for off array entries formed */ 1559 a->donotstash = PETSC_FALSE; 1560 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1561 1562 /* stuff used for matrix vector multiply */ 1563 a->lvec = 0; 1564 a->Mvctx = 0; 1565 a->roworiented = PETSC_TRUE; 1566 1567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1568 "MatGetDiagonalBlock_MPIDense", 1569 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1571 "MatMPIDenseSetPreallocation_MPIDense", 1572 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1574 "MatMatMult_MPIAIJ_MPIDense", 1575 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1577 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1578 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1580 "MatMatMultNumeric_MPIAIJ_MPIDense", 1581 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1582 #if defined(PETSC_HAVE_PLAPACK) 1583 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1584 "MatSetSolverType_MPIDense_PLAPACK", 1585 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1586 #endif 1587 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1588 1589 PetscFunctionReturn(0); 1590 } 1591 EXTERN_C_END 1592 1593 /*MC 1594 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1595 1596 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1597 and MATMPIDENSE otherwise. 1598 1599 Options Database Keys: 1600 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1601 1602 Level: beginner 1603 1604 1605 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1606 M*/ 1607 1608 EXTERN_C_BEGIN 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatCreate_Dense" 1611 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1612 { 1613 PetscErrorCode ierr; 1614 PetscMPIInt size; 1615 1616 PetscFunctionBegin; 1617 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1618 if (size == 1) { 1619 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1620 } else { 1621 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1622 } 1623 PetscFunctionReturn(0); 1624 } 1625 EXTERN_C_END 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1629 /*@C 1630 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1631 1632 Not collective 1633 1634 Input Parameters: 1635 . A - the matrix 1636 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1637 to control all matrix memory allocation. 1638 1639 Notes: 1640 The dense format is fully compatible with standard Fortran 77 1641 storage by columns. 1642 1643 The data input variable is intended primarily for Fortran programmers 1644 who wish to allocate their own matrix memory space. Most users should 1645 set data=PETSC_NULL. 1646 1647 Level: intermediate 1648 1649 .keywords: matrix,dense, parallel 1650 1651 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1652 @*/ 1653 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1654 { 1655 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1656 1657 PetscFunctionBegin; 1658 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1659 if (f) { 1660 ierr = (*f)(mat,data);CHKERRQ(ierr); 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatCreateMPIDense" 1667 /*@C 1668 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1669 1670 Collective on MPI_Comm 1671 1672 Input Parameters: 1673 + comm - MPI communicator 1674 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1675 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1676 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1677 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1678 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1679 to control all matrix memory allocation. 1680 1681 Output Parameter: 1682 . A - the matrix 1683 1684 Notes: 1685 The dense format is fully compatible with standard Fortran 77 1686 storage by columns. 1687 1688 The data input variable is intended primarily for Fortran programmers 1689 who wish to allocate their own matrix memory space. Most users should 1690 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1691 1692 The user MUST specify either the local or global matrix dimensions 1693 (possibly both). 1694 1695 Level: intermediate 1696 1697 .keywords: matrix,dense, parallel 1698 1699 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1700 @*/ 1701 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1702 { 1703 PetscErrorCode ierr; 1704 PetscMPIInt size; 1705 1706 PetscFunctionBegin; 1707 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1708 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1709 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1710 if (size > 1) { 1711 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1712 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1713 } else { 1714 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1715 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatDuplicate_MPIDense" 1722 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1723 { 1724 Mat mat; 1725 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 *newmat = 0; 1730 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1731 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1732 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1733 a = (Mat_MPIDense*)mat->data; 1734 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1735 mat->factor = A->factor; 1736 mat->assembled = PETSC_TRUE; 1737 mat->preallocated = PETSC_TRUE; 1738 1739 mat->rmap.rstart = A->rmap.rstart; 1740 mat->rmap.rend = A->rmap.rend; 1741 a->size = oldmat->size; 1742 a->rank = oldmat->rank; 1743 mat->insertmode = NOT_SET_VALUES; 1744 a->nvec = oldmat->nvec; 1745 a->donotstash = oldmat->donotstash; 1746 1747 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1748 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1749 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1750 1751 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1752 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1753 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1754 1755 #if defined(PETSC_HAVE_PLAPACK) 1756 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1757 #endif 1758 *newmat = mat; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #include "petscsys.h" 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1766 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1767 { 1768 PetscErrorCode ierr; 1769 PetscMPIInt rank,size; 1770 PetscInt *rowners,i,m,nz,j; 1771 PetscScalar *array,*vals,*vals_ptr; 1772 MPI_Status status; 1773 1774 PetscFunctionBegin; 1775 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1776 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1777 1778 /* determine ownership of all rows */ 1779 m = M/size + ((M % size) > rank); 1780 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1781 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1782 rowners[0] = 0; 1783 for (i=2; i<=size; i++) { 1784 rowners[i] += rowners[i-1]; 1785 } 1786 1787 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1788 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1789 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1790 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1791 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1792 1793 if (!rank) { 1794 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1795 1796 /* read in my part of the matrix numerical values */ 1797 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1798 1799 /* insert into matrix-by row (this is why cannot directly read into array */ 1800 vals_ptr = vals; 1801 for (i=0; i<m; i++) { 1802 for (j=0; j<N; j++) { 1803 array[i + j*m] = *vals_ptr++; 1804 } 1805 } 1806 1807 /* read in other processors and ship out */ 1808 for (i=1; i<size; i++) { 1809 nz = (rowners[i+1] - rowners[i])*N; 1810 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1811 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1812 } 1813 } else { 1814 /* receive numeric values */ 1815 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1816 1817 /* receive message of values*/ 1818 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1819 1820 /* insert into matrix-by row (this is why cannot directly read into array */ 1821 vals_ptr = vals; 1822 for (i=0; i<m; i++) { 1823 for (j=0; j<N; j++) { 1824 array[i + j*m] = *vals_ptr++; 1825 } 1826 } 1827 } 1828 ierr = PetscFree(rowners);CHKERRQ(ierr); 1829 ierr = PetscFree(vals);CHKERRQ(ierr); 1830 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1831 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatLoad_MPIDense" 1837 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1838 { 1839 Mat A; 1840 PetscScalar *vals,*svals; 1841 MPI_Comm comm = ((PetscObject)viewer)->comm; 1842 MPI_Status status; 1843 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1844 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1845 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1846 PetscInt i,nz,j,rstart,rend; 1847 int fd; 1848 PetscErrorCode ierr; 1849 1850 PetscFunctionBegin; 1851 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1852 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1853 if (!rank) { 1854 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1855 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1856 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1857 } 1858 1859 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1860 M = header[1]; N = header[2]; nz = header[3]; 1861 1862 /* 1863 Handle case where matrix is stored on disk as a dense matrix 1864 */ 1865 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1866 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 /* determine ownership of all rows */ 1871 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1872 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1873 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1874 rowners[0] = 0; 1875 for (i=2; i<=size; i++) { 1876 rowners[i] += rowners[i-1]; 1877 } 1878 rstart = rowners[rank]; 1879 rend = rowners[rank+1]; 1880 1881 /* distribute row lengths to all processors */ 1882 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1883 offlens = ourlens + (rend-rstart); 1884 if (!rank) { 1885 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1886 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1887 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1888 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1889 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1890 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1891 } else { 1892 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1893 } 1894 1895 if (!rank) { 1896 /* calculate the number of nonzeros on each processor */ 1897 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1898 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1899 for (i=0; i<size; i++) { 1900 for (j=rowners[i]; j< rowners[i+1]; j++) { 1901 procsnz[i] += rowlengths[j]; 1902 } 1903 } 1904 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1905 1906 /* determine max buffer needed and allocate it */ 1907 maxnz = 0; 1908 for (i=0; i<size; i++) { 1909 maxnz = PetscMax(maxnz,procsnz[i]); 1910 } 1911 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1912 1913 /* read in my part of the matrix column indices */ 1914 nz = procsnz[0]; 1915 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1916 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1917 1918 /* read in every one elses and ship off */ 1919 for (i=1; i<size; i++) { 1920 nz = procsnz[i]; 1921 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1922 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1923 } 1924 ierr = PetscFree(cols);CHKERRQ(ierr); 1925 } else { 1926 /* determine buffer space needed for message */ 1927 nz = 0; 1928 for (i=0; i<m; i++) { 1929 nz += ourlens[i]; 1930 } 1931 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1932 1933 /* receive message of column indices*/ 1934 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1935 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1936 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1937 } 1938 1939 /* loop over local rows, determining number of off diagonal entries */ 1940 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1941 jj = 0; 1942 for (i=0; i<m; i++) { 1943 for (j=0; j<ourlens[i]; j++) { 1944 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1945 jj++; 1946 } 1947 } 1948 1949 /* create our matrix */ 1950 for (i=0; i<m; i++) { 1951 ourlens[i] -= offlens[i]; 1952 } 1953 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1954 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1955 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1956 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1957 A = *newmat; 1958 for (i=0; i<m; i++) { 1959 ourlens[i] += offlens[i]; 1960 } 1961 1962 if (!rank) { 1963 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1964 1965 /* read in my part of the matrix numerical values */ 1966 nz = procsnz[0]; 1967 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1968 1969 /* insert into matrix */ 1970 jj = rstart; 1971 smycols = mycols; 1972 svals = vals; 1973 for (i=0; i<m; i++) { 1974 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1975 smycols += ourlens[i]; 1976 svals += ourlens[i]; 1977 jj++; 1978 } 1979 1980 /* read in other processors and ship out */ 1981 for (i=1; i<size; i++) { 1982 nz = procsnz[i]; 1983 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1984 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1985 } 1986 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1987 } else { 1988 /* receive numeric values */ 1989 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1990 1991 /* receive message of values*/ 1992 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1993 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1994 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1995 1996 /* insert into matrix */ 1997 jj = rstart; 1998 smycols = mycols; 1999 svals = vals; 2000 for (i=0; i<m; i++) { 2001 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2002 smycols += ourlens[i]; 2003 svals += ourlens[i]; 2004 jj++; 2005 } 2006 } 2007 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2008 ierr = PetscFree(vals);CHKERRQ(ierr); 2009 ierr = PetscFree(mycols);CHKERRQ(ierr); 2010 ierr = PetscFree(rowners);CHKERRQ(ierr); 2011 2012 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatEqual_MPIDense" 2019 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2020 { 2021 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2022 Mat a,b; 2023 PetscTruth flg; 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 a = matA->A; 2028 b = matB->A; 2029 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2030 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #if defined(PETSC_HAVE_PLAPACK) 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2038 /*@C 2039 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2040 Level: developer 2041 2042 .keywords: Petsc, destroy, package, PLAPACK 2043 .seealso: PetscFinalize() 2044 @*/ 2045 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2046 { 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = PLA_Finalize();CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2056 /*@C 2057 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2058 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2059 when using static libraries. 2060 2061 Input Parameter: 2062 path - The dynamic library path, or PETSC_NULL 2063 2064 Level: developer 2065 2066 .keywords: Petsc, initialize, package, PLAPACK 2067 .seealso: PetscInitializePackage(), PetscInitialize() 2068 @*/ 2069 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2070 { 2071 MPI_Comm comm = PETSC_COMM_WORLD; 2072 PetscMPIInt size; 2073 PetscErrorCode ierr; 2074 2075 PetscFunctionBegin; 2076 if (!PLA_Initialized(PETSC_NULL)) { 2077 2078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2079 Plapack_nprows = 1; 2080 Plapack_npcols = size; 2081 2082 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2083 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2084 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2085 #if defined(PETSC_USE_DEBUG) 2086 Plapack_ierror = 3; 2087 #else 2088 Plapack_ierror = 0; 2089 #endif 2090 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2091 if (Plapack_ierror){ 2092 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2093 } else { 2094 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2095 } 2096 2097 Plapack_nb_alg = 0; 2098 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2099 if (Plapack_nb_alg) { 2100 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2101 } 2102 PetscOptionsEnd(); 2103 2104 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2105 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2106 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2107 } 2108 PetscFunctionReturn(0); 2109 } 2110 2111 2112 #endif 2113