1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap.N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 190 PetscScalar *av,*bv,*v = lmat->v; 191 Mat newmat; 192 193 PetscFunctionBegin; 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 199 /* No parallel redistribution currently supported! Should really check each index set 200 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 201 original matrix! */ 202 203 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 204 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 205 206 /* Check submatrix call */ 207 if (scall == MAT_REUSE_MATRIX) { 208 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 209 /* Really need to test rows and column sizes! */ 210 newmat = *B; 211 } else { 212 /* Create and fill new matrix */ 213 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 214 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 215 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 216 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 217 } 218 219 /* Now extract the data pointers and do the copy, column at a time */ 220 newmatd = (Mat_MPIDense*)newmat->data; 221 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 222 223 for (i=0; i<ncols; i++) { 224 av = v + nlrows*icol[i]; 225 for (j=0; j<nrows; j++) { 226 *bv++ = av[irow[j] - rstart]; 227 } 228 } 229 230 /* Assemble the matrices so that the correct flags are set */ 231 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 234 /* Free work space */ 235 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 237 *B = newmat; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatRestoreArray_MPIDense" 243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 244 { 245 PetscFunctionBegin; 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 252 { 253 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 254 MPI_Comm comm = ((PetscObject)mat)->comm; 255 PetscErrorCode ierr; 256 PetscInt nstash,reallocs; 257 InsertMode addv; 258 259 PetscFunctionBegin; 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) { 263 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 264 } 265 mat->insertmode = addv; /* in case this processor had no cache */ 266 267 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 268 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 269 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 276 { 277 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,*row,*col,flg,j,rstart,ncols; 280 PetscMPIInt n; 281 PetscScalar *val; 282 InsertMode addv=mat->insertmode; 283 284 PetscFunctionBegin; 285 /* wait on receives */ 286 while (1) { 287 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 288 if (!flg) break; 289 290 for (i=0; i<n;) { 291 /* Now identify the consecutive vals belonging to the same row */ 292 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap.range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm = ((PetscObject)A)->comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscTruth found; 345 346 PetscFunctionBegin; 347 /* first count number of contributors to each processor */ 348 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 349 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 351 for (i=0; i<N; i++) { 352 idx = rows[i]; 353 found = PETSC_FALSE; 354 for (j=0; j<size; j++) { 355 if (idx >= owners[j] && idx < owners[j+1]) { 356 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 357 } 358 } 359 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 360 } 361 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 362 363 /* inform other processors of number of messages and max length*/ 364 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 365 366 /* post receives: */ 367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 369 for (i=0; i<nrecvs; i++) { 370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 371 } 372 373 /* do sends: 374 1) starts[i] gives the starting index in svalues for stuff going to 375 the ith processor 376 */ 377 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 379 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 380 starts[0] = 0; 381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 382 for (i=0; i<N; i++) { 383 svalues[starts[owner[i]]++] = rows[i]; 384 } 385 386 starts[0] = 0; 387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 388 count = 0; 389 for (i=0; i<size; i++) { 390 if (nprocs[2*i+1]) { 391 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscFree(starts);CHKERRQ(ierr); 395 396 base = owners[rank]; 397 398 /* wait on receives */ 399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 400 source = lens + nrecvs; 401 count = nrecvs; slen = 0; 402 while (count) { 403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 404 /* unpack receives into our local space */ 405 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 406 source[imdex] = recv_status.MPI_SOURCE; 407 lens[imdex] = n; 408 slen += n; 409 count--; 410 } 411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 412 413 /* move the data into the send scatter */ 414 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 415 count = 0; 416 for (i=0; i<nrecvs; i++) { 417 values = rvalues + i*nmax; 418 for (j=0; j<lens[i]; j++) { 419 lrows[count++] = values[j] - base; 420 } 421 } 422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 423 ierr = PetscFree(lens);CHKERRQ(ierr); 424 ierr = PetscFree(owner);CHKERRQ(ierr); 425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 426 427 /* actually zap the local rows */ 428 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 429 ierr = PetscFree(lrows);CHKERRQ(ierr); 430 431 /* wait on sends */ 432 if (nsends) { 433 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 434 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 435 ierr = PetscFree(send_status);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(send_waits);CHKERRQ(ierr); 438 ierr = PetscFree(svalues);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMult_MPIDense" 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 452 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatMultAdd_MPIDense" 459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 462 PetscErrorCode ierr; 463 464 PetscFunctionBegin; 465 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatMultTranspose_MPIDense" 473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 PetscErrorCode ierr; 477 PetscScalar zero = 0.0; 478 479 PetscFunctionBegin; 480 ierr = VecSet(yy,zero);CHKERRQ(ierr); 481 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 482 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 483 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 496 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 497 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatGetDiagonal_MPIDense" 504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 508 PetscErrorCode ierr; 509 PetscInt len,i,n,m = A->rmap.n,radd; 510 PetscScalar *x,zero = 0.0; 511 512 PetscFunctionBegin; 513 ierr = VecSet(v,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 518 radd = A->rmap.rstart*m; 519 for (i=0; i<len; i++) { 520 x[i] = aloc->v[radd + i*m + i]; 521 } 522 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatDestroy_MPIDense" 528 PetscErrorCode MatDestroy_MPIDense(Mat mat) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 #if defined(PETSC_HAVE_PLAPACK) 533 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 534 #endif 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 543 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 544 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 545 #if defined(PETSC_HAVE_PLAPACK) 546 if (lu) { 547 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 548 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 549 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 550 551 if (lu->is_pla) { 552 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 553 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 554 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 555 } 556 } 557 #endif 558 559 ierr = PetscFree(mdn);CHKERRQ(ierr); 560 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatView_MPIDense_Binary" 571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 PetscViewerFormat format; 576 int fd; 577 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 578 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 579 PetscScalar *work,*v,*vv; 580 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 581 MPI_Status status; 582 583 PetscFunctionBegin; 584 if (mdn->size == 1) { 585 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 588 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 589 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 590 591 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 592 if (format == PETSC_VIEWER_BINARY_NATIVE) { 593 594 if (!rank) { 595 /* store the matrix as a dense matrix */ 596 header[0] = MAT_FILE_COOKIE; 597 header[1] = mat->rmap.N; 598 header[2] = N; 599 header[3] = MATRIX_BINARY_FORMAT_DENSE; 600 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 601 602 /* get largest work array needed for transposing array */ 603 mmax = mat->rmap.n; 604 for (i=1; i<size; i++) { 605 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 606 } 607 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 608 609 /* write out local array, by rows */ 610 m = mat->rmap.n; 611 v = a->v; 612 for (j=0; j<N; j++) { 613 for (i=0; i<m; i++) { 614 work[j + i*N] = *v++; 615 } 616 } 617 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 618 /* get largest work array to receive messages from other processes, excludes process zero */ 619 mmax = 0; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 622 } 623 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 624 v = vv; 625 for (k=1; k<size; k++) { 626 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 627 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 628 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(work);CHKERRQ(ierr); 637 ierr = PetscFree(vv);CHKERRQ(ierr); 638 } else { 639 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 640 } 641 } 642 } 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 649 { 650 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 651 PetscErrorCode ierr; 652 PetscMPIInt size = mdn->size,rank = mdn->rank; 653 PetscViewerType vtype; 654 PetscTruth iascii,isdraw; 655 PetscViewer sviewer; 656 PetscViewerFormat format; 657 #if defined(PETSC_HAVE_PLAPACK) 658 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 659 #endif 660 661 PetscFunctionBegin; 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 664 if (iascii) { 665 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 666 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 667 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 668 MatInfo info; 669 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 670 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 671 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 672 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 673 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 674 #if defined(PETSC_HAVE_PLAPACK) 675 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 680 #endif 681 PetscFunctionReturn(0); 682 } else if (format == PETSC_VIEWER_ASCII_INFO) { 683 PetscFunctionReturn(0); 684 } 685 } else if (isdraw) { 686 PetscDraw draw; 687 PetscTruth isnull; 688 689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 691 if (isnull) PetscFunctionReturn(0); 692 } 693 694 if (size == 1) { 695 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 696 } else { 697 /* assemble the entire matrix onto first processor. */ 698 Mat A; 699 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 700 PetscInt *cols; 701 PetscScalar *vals; 702 703 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 708 } 709 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 710 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 711 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 712 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 713 714 /* Copy the matrix ... This isn't the most efficient means, 715 but it's quick for now */ 716 A->insertmode = INSERT_VALUES; 717 row = mat->rmap.rstart; m = mdn->A->rmap.n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 730 } 731 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(A);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatView_MPIDense" 740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 741 { 742 PetscErrorCode ierr; 743 PetscTruth iascii,isbinary,isdraw,issocket; 744 745 PetscFunctionBegin; 746 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 749 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 751 752 if (iascii || issocket || isdraw) { 753 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 754 } else if (isbinary) { 755 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 756 } else { 757 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->rows_global = (double)A->rmap.N; 773 info->columns_global = (double)A->cmap.N; 774 info->rows_local = (double)A->rmap.n; 775 info->columns_local = (double)A->cmap.N; 776 info->block_size = 1.0; 777 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 788 info->nz_used = irecv[0]; 789 info->nz_allocated = irecv[1]; 790 info->nz_unneeded = irecv[2]; 791 info->memory = irecv[3]; 792 info->mallocs = irecv[4]; 793 } else if (flag == MAT_GLOBAL_SUM) { 794 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_USE_HASH_TABLE: 827 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 828 break; 829 case MAT_IGNORE_OFF_PROC_ENTRIES: 830 a->donotstash = flg; 831 break; 832 case MAT_SYMMETRIC: 833 case MAT_STRUCTURALLY_SYMMETRIC: 834 case MAT_HERMITIAN: 835 case MAT_SYMMETRY_ETERNAL: 836 case MAT_IGNORE_LOWER_TRIANGULAR: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 default: 840 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDiagonalScale_MPIDense" 848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 849 { 850 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 851 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 852 PetscScalar *l,*r,x,*v; 853 PetscErrorCode ierr; 854 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 855 856 PetscFunctionBegin; 857 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 858 if (ll) { 859 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 860 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 861 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 862 for (i=0; i<m; i++) { 863 x = l[i]; 864 v = mat->v + i; 865 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 866 } 867 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 if (rr) { 871 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 872 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 873 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 876 for (i=0; i<n; i++) { 877 x = r[i]; 878 v = mat->v + i*m; 879 for (j=0; j<m; j++) { (*v++) *= x;} 880 } 881 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 882 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatNorm_MPIDense" 889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 890 { 891 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 892 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 893 PetscErrorCode ierr; 894 PetscInt i,j; 895 PetscReal sum = 0.0; 896 PetscScalar *v = mat->v; 897 898 PetscFunctionBegin; 899 if (mdn->size == 1) { 900 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 901 } else { 902 if (type == NORM_FROBENIUS) { 903 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 904 #if defined(PETSC_USE_COMPLEX) 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 #else 907 sum += (*v)*(*v); v++; 908 #endif 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 911 *nrm = sqrt(*nrm); 912 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 916 tmp2 = tmp + A->cmap.N; 917 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap.n; j++) { 921 for (i=0; i<mdn->A->rmap.n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 for (j=0; j<A->cmap.N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree(tmp);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 937 } 938 } 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatTranspose_MPIDense" 944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 947 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 948 Mat B; 949 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 950 PetscErrorCode ierr; 951 PetscInt j,i; 952 PetscScalar *v; 953 954 PetscFunctionBegin; 955 if (A == *matout && M != N) { 956 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 957 } 958 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 959 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 960 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 962 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 963 } else { 964 B = *matout; 965 } 966 967 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 968 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 969 for (i=0; i<m; i++) rwork[i] = rstart + i; 970 for (j=0; j<n; j++) { 971 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 972 v += m; 973 } 974 ierr = PetscFree(rwork);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 if (*matout != A) { 978 *matout = B; 979 } else { 980 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 981 } 982 PetscFunctionReturn(0); 983 } 984 985 #include "petscblaslapack.h" 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatScale_MPIDense" 988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 989 { 990 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 991 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 992 PetscScalar oalpha = alpha; 993 PetscErrorCode ierr; 994 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 995 996 PetscFunctionBegin; 997 BLASscal_(&nz,&oalpha,a->v,&one); 998 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1007 { 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #if defined(PETSC_HAVE_PLAPACK) 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatMPIDenseCreatePlapack" 1019 PetscErrorCode MatMPIDenseCreatePlapack(Mat A) 1020 { 1021 Mat_Plapack *lu; 1022 PetscErrorCode ierr; 1023 PetscInt M=A->rmap.N; 1024 MPI_Comm comm=((PetscObject)A)->comm; 1025 PetscMPIInt size; 1026 1027 PetscFunctionBegin; 1028 if (A->spptr) PetscFunctionReturn(0); 1029 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1030 A->spptr = (void*)lu; 1031 1032 lu = (Mat_Plapack*)(A->spptr); 1033 1034 /* Set default Plapack parameters */ 1035 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1036 lu->nb = M/size; 1037 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1038 1039 /* Set runtime options */ 1040 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1041 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1042 PetscOptionsEnd(); 1043 1044 /* Create object distribution template */ 1045 lu->templ = NULL; 1046 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1047 1048 /* Set the datatype */ 1049 #if defined(PETSC_USE_COMPLEX) 1050 lu->datatype = MPI_DOUBLE_COMPLEX; 1051 #else 1052 lu->datatype = MPI_DOUBLE; 1053 #endif 1054 1055 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1056 1057 1058 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1064 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1065 { 1066 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1067 PetscErrorCode ierr; 1068 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1069 PetscScalar *array; 1070 PetscReal one = 1.0; 1071 1072 PetscFunctionBegin; 1073 /* Copy A into F->lu->A */ 1074 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1075 ierr = PLA_API_begin();CHKERRQ(ierr); 1076 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1077 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1078 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1079 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1080 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1081 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1082 ierr = PLA_API_end();CHKERRQ(ierr); 1083 lu->rstart = rstart; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1089 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1090 { 1091 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1092 PetscErrorCode ierr; 1093 PetscInt M=A->cmap.N,m=A->rmap.n,rstart; 1094 PetscScalar *array; 1095 PetscReal one = 1.0; 1096 1097 PetscFunctionBegin; 1098 /* Copy F into A->lu->A */ 1099 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1100 ierr = PLA_API_begin();CHKERRQ(ierr); 1101 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1102 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1103 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1104 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1105 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1106 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1107 ierr = PLA_API_end();CHKERRQ(ierr); 1108 lu->rstart = rstart; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1114 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1115 { 1116 PetscErrorCode ierr; 1117 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1118 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1119 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1120 PLA_Obj alpha = NULL,beta = NULL; 1121 1122 PetscFunctionBegin; 1123 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1124 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1125 1126 /* do the multiply in PLA */ 1127 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1128 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1129 CHKMEMQ; 1130 1131 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);CHKERRQ(ierr); 1132 CHKMEMQ; 1133 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1134 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1135 1136 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1142 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1143 { 1144 PetscErrorCode ierr; 1145 PetscInt m=A->rmap.n,n=B->cmap.n; 1146 Mat Cmat; 1147 1148 PetscFunctionBegin; 1149 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1150 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1151 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1152 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1153 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1154 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1155 ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1156 ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1157 ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1158 *C = Cmat; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1164 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1165 { 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 if (scall == MAT_INITIAL_MATRIX){ 1170 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1171 } 1172 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatSolve_MPIDense" 1179 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1180 { 1181 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1182 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1183 PetscErrorCode ierr; 1184 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1185 PetscScalar *array; 1186 PetscReal one = 1.0; 1187 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1188 PLA_Obj v_pla = NULL; 1189 PetscScalar *loc_buf; 1190 Vec loc_x; 1191 1192 PetscFunctionBegin; 1193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1194 1195 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1196 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1197 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1198 1199 /* Copy b into rhs_pla */ 1200 ierr = PLA_API_begin();CHKERRQ(ierr); 1201 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1202 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1203 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1204 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1205 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1206 ierr = PLA_API_end();CHKERRQ(ierr); 1207 1208 if (A->factor == FACTOR_LU){ 1209 /* Apply the permutations to the right hand sides */ 1210 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1211 1212 /* Solve L y = b, overwriting b with y */ 1213 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1214 1215 /* Solve U x = y (=b), overwriting b with x */ 1216 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1217 } else { /* FACTOR_CHOLESKY */ 1218 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1219 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1220 } 1221 1222 /* Copy PLAPACK x into Petsc vector x */ 1223 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1224 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1225 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1226 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1227 if (!lu->pla_solved){ 1228 1229 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1230 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1231 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1232 1233 /* Create IS and cts for VecScatterring */ 1234 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1235 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1236 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1237 idx_petsc = idx_pla + loc_m; 1238 1239 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1240 for (i=0; i<loc_m; i+=lu->nb){ 1241 j = 0; 1242 while (j < lu->nb && i+j < loc_m){ 1243 idx_petsc[i+j] = rstart + j; j++; 1244 } 1245 rstart += size*lu->nb; 1246 } 1247 1248 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1249 1250 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1252 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1253 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1254 } 1255 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 1258 /* Free data */ 1259 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1260 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1261 1262 lu->pla_solved = PETSC_TRUE; 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1268 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1269 { 1270 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1271 PetscErrorCode ierr; 1272 PetscInt info_pla; 1273 1274 PetscFunctionBegin; 1275 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1276 1277 /* Copy A into F->lu->A */ 1278 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1279 1280 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1281 info_pla = PLA_LU(lu->A,lu->pivots); 1282 if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1283 1284 (*F)->assembled = PETSC_TRUE; 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1290 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1291 { 1292 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1293 PetscErrorCode ierr; 1294 PetscInt info_pla; 1295 1296 PetscFunctionBegin; 1297 1298 /* Copy A into F->lu->A */ 1299 ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr); 1300 1301 /* Factor P A -> Chol */ 1302 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1303 if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1304 1305 (*F)->assembled = PETSC_TRUE; 1306 PetscFunctionReturn(0); 1307 } 1308 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1312 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1313 { 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 /* Create the factorization matrix */ 1318 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1319 ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1320 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1321 ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /* Note the Petsc r and c permutations are ignored */ 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1328 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1329 { 1330 PetscErrorCode ierr; 1331 PetscInt M = A->rmap.N; 1332 Mat_Plapack *lu; 1333 1334 PetscFunctionBegin; 1335 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1336 lu = (Mat_Plapack*)(*F)->spptr; 1337 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1338 (*F)->factor = FACTOR_LU; 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /* Note the Petsc perm permutation is ignored */ 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1345 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1346 { 1347 PetscErrorCode ierr; 1348 PetscTruth issymmetric,set; 1349 1350 PetscFunctionBegin; 1351 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1352 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1353 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1354 (*F)->factor = FACTOR_CHOLESKY; 1355 PetscFunctionReturn(0); 1356 } 1357 #endif 1358 1359 /* -------------------------------------------------------------------*/ 1360 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1361 MatGetRow_MPIDense, 1362 MatRestoreRow_MPIDense, 1363 MatMult_MPIDense, 1364 /* 4*/ MatMultAdd_MPIDense, 1365 MatMultTranspose_MPIDense, 1366 MatMultTransposeAdd_MPIDense, 1367 #if defined(PETSC_HAVE_PLAPACK) 1368 MatSolve_MPIDense, 1369 #else 1370 0, 1371 #endif 1372 0, 1373 0, 1374 /*10*/ 0, 1375 0, 1376 0, 1377 0, 1378 MatTranspose_MPIDense, 1379 /*15*/ MatGetInfo_MPIDense, 1380 MatEqual_MPIDense, 1381 MatGetDiagonal_MPIDense, 1382 MatDiagonalScale_MPIDense, 1383 MatNorm_MPIDense, 1384 /*20*/ MatAssemblyBegin_MPIDense, 1385 MatAssemblyEnd_MPIDense, 1386 0, 1387 MatSetOption_MPIDense, 1388 MatZeroEntries_MPIDense, 1389 /*25*/ MatZeroRows_MPIDense, 1390 #if defined(PETSC_HAVE_PLAPACK) 1391 MatLUFactorSymbolic_MPIDense, 1392 MatLUFactorNumeric_MPIDense, 1393 MatCholeskyFactorSymbolic_MPIDense, 1394 MatCholeskyFactorNumeric_MPIDense, 1395 #else 1396 0, 1397 0, 1398 0, 1399 0, 1400 #endif 1401 /*30*/ MatSetUpPreallocation_MPIDense, 1402 0, 1403 0, 1404 MatGetArray_MPIDense, 1405 MatRestoreArray_MPIDense, 1406 /*35*/ MatDuplicate_MPIDense, 1407 0, 1408 0, 1409 0, 1410 0, 1411 /*40*/ 0, 1412 MatGetSubMatrices_MPIDense, 1413 0, 1414 MatGetValues_MPIDense, 1415 0, 1416 /*45*/ 0, 1417 MatScale_MPIDense, 1418 0, 1419 0, 1420 0, 1421 /*50*/ 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 /*55*/ 0, 1427 0, 1428 0, 1429 0, 1430 0, 1431 /*60*/ MatGetSubMatrix_MPIDense, 1432 MatDestroy_MPIDense, 1433 MatView_MPIDense, 1434 0, 1435 0, 1436 /*65*/ 0, 1437 0, 1438 0, 1439 0, 1440 0, 1441 /*70*/ 0, 1442 0, 1443 0, 1444 0, 1445 0, 1446 /*75*/ 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 /*80*/ 0, 1452 0, 1453 0, 1454 0, 1455 /*84*/ MatLoad_MPIDense, 1456 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*90*/ 1462 #if defined(PETSC_HAVE_PLAPACK) 1463 MatMatMult_MPIDense_MPIDense, 1464 MatMatMultSymbolic_MPIDense_MPIDense, 1465 MatMatMultNumeric_MPIDense_MPIDense, 1466 #else 1467 0, 1468 0, 1469 0, 1470 #endif 1471 0, 1472 /*95*/ 0, 1473 0, 1474 0, 1475 0}; 1476 1477 EXTERN_C_BEGIN 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1481 { 1482 Mat_MPIDense *a; 1483 PetscErrorCode ierr; 1484 1485 PetscFunctionBegin; 1486 mat->preallocated = PETSC_TRUE; 1487 /* Note: For now, when data is specified above, this assumes the user correctly 1488 allocates the local dense storage space. We should add error checking. */ 1489 1490 a = (Mat_MPIDense*)mat->data; 1491 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1492 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1493 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1494 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1495 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 EXTERN_C_END 1499 1500 EXTERN_C_BEGIN 1501 #if defined(PETSC_HAVE_PLAPACK) 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1504 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1505 { 1506 PetscFunctionBegin; 1507 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1508 PetscFunctionReturn(0); 1509 } 1510 #endif 1511 EXTERN_C_END 1512 1513 /*MC 1514 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1515 1516 Options Database Keys: 1517 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1518 1519 Level: beginner 1520 1521 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1522 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1523 (run config/configure.py with the option --download-plapack) 1524 1525 1526 Options Database Keys: 1527 . -mat_plapack_nprows <n> - number of rows in processor partition 1528 . -mat_plapack_npcols <n> - number of columns in processor partition 1529 . -mat_plapack_nb <n> - block size of template vector 1530 . -mat_plapack_nb_alg <n> - algorithmic block size 1531 - -mat_plapack_ckerror <n> - error checking flag 1532 1533 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1534 M*/ 1535 1536 EXTERN_C_BEGIN 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatCreate_MPIDense" 1539 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1540 { 1541 Mat_MPIDense *a; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1546 mat->data = (void*)a; 1547 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1548 mat->factor = 0; 1549 mat->mapping = 0; 1550 1551 mat->insertmode = NOT_SET_VALUES; 1552 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1553 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1554 1555 mat->rmap.bs = mat->cmap.bs = 1; 1556 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1557 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1558 a->nvec = mat->cmap.n; 1559 1560 /* build cache for off array entries formed */ 1561 a->donotstash = PETSC_FALSE; 1562 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1563 1564 /* stuff used for matrix vector multiply */ 1565 a->lvec = 0; 1566 a->Mvctx = 0; 1567 a->roworiented = PETSC_TRUE; 1568 1569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1570 "MatGetDiagonalBlock_MPIDense", 1571 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1573 "MatMPIDenseSetPreallocation_MPIDense", 1574 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1576 "MatMatMult_MPIAIJ_MPIDense", 1577 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1579 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1580 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1582 "MatMatMultNumeric_MPIAIJ_MPIDense", 1583 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1584 #if defined(PETSC_HAVE_PLAPACK) 1585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1586 "MatSetSolverType_MPIDense_PLAPACK", 1587 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1588 #endif 1589 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1590 1591 PetscFunctionReturn(0); 1592 } 1593 EXTERN_C_END 1594 1595 /*MC 1596 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1597 1598 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1599 and MATMPIDENSE otherwise. 1600 1601 Options Database Keys: 1602 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1603 1604 Level: beginner 1605 1606 1607 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1608 M*/ 1609 1610 EXTERN_C_BEGIN 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "MatCreate_Dense" 1613 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1614 { 1615 PetscErrorCode ierr; 1616 PetscMPIInt size; 1617 1618 PetscFunctionBegin; 1619 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1620 if (size == 1) { 1621 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1622 } else { 1623 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 EXTERN_C_END 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1631 /*@C 1632 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1633 1634 Not collective 1635 1636 Input Parameters: 1637 . A - the matrix 1638 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1639 to control all matrix memory allocation. 1640 1641 Notes: 1642 The dense format is fully compatible with standard Fortran 77 1643 storage by columns. 1644 1645 The data input variable is intended primarily for Fortran programmers 1646 who wish to allocate their own matrix memory space. Most users should 1647 set data=PETSC_NULL. 1648 1649 Level: intermediate 1650 1651 .keywords: matrix,dense, parallel 1652 1653 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1654 @*/ 1655 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1656 { 1657 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1658 1659 PetscFunctionBegin; 1660 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1661 if (f) { 1662 ierr = (*f)(mat,data);CHKERRQ(ierr); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatCreateMPIDense" 1669 /*@C 1670 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1671 1672 Collective on MPI_Comm 1673 1674 Input Parameters: 1675 + comm - MPI communicator 1676 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1677 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1678 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1679 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1680 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1681 to control all matrix memory allocation. 1682 1683 Output Parameter: 1684 . A - the matrix 1685 1686 Notes: 1687 The dense format is fully compatible with standard Fortran 77 1688 storage by columns. 1689 1690 The data input variable is intended primarily for Fortran programmers 1691 who wish to allocate their own matrix memory space. Most users should 1692 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1693 1694 The user MUST specify either the local or global matrix dimensions 1695 (possibly both). 1696 1697 Level: intermediate 1698 1699 .keywords: matrix,dense, parallel 1700 1701 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1702 @*/ 1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1704 { 1705 PetscErrorCode ierr; 1706 PetscMPIInt size; 1707 1708 PetscFunctionBegin; 1709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1710 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1712 if (size > 1) { 1713 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1714 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1715 } else { 1716 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1717 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatDuplicate_MPIDense" 1724 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1725 { 1726 Mat mat; 1727 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 *newmat = 0; 1732 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1733 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1734 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1735 a = (Mat_MPIDense*)mat->data; 1736 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1737 mat->factor = A->factor; 1738 mat->assembled = PETSC_TRUE; 1739 mat->preallocated = PETSC_TRUE; 1740 1741 mat->rmap.rstart = A->rmap.rstart; 1742 mat->rmap.rend = A->rmap.rend; 1743 a->size = oldmat->size; 1744 a->rank = oldmat->rank; 1745 mat->insertmode = NOT_SET_VALUES; 1746 a->nvec = oldmat->nvec; 1747 a->donotstash = oldmat->donotstash; 1748 1749 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1751 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1752 1753 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1754 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1755 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1756 1757 #if defined(PETSC_HAVE_PLAPACK) 1758 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1759 #endif 1760 *newmat = mat; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #include "petscsys.h" 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1768 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1769 { 1770 PetscErrorCode ierr; 1771 PetscMPIInt rank,size; 1772 PetscInt *rowners,i,m,nz,j; 1773 PetscScalar *array,*vals,*vals_ptr; 1774 MPI_Status status; 1775 1776 PetscFunctionBegin; 1777 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1778 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1779 1780 /* determine ownership of all rows */ 1781 m = M/size + ((M % size) > rank); 1782 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1783 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1784 rowners[0] = 0; 1785 for (i=2; i<=size; i++) { 1786 rowners[i] += rowners[i-1]; 1787 } 1788 1789 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1790 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1791 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1792 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1793 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1794 1795 if (!rank) { 1796 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1797 1798 /* read in my part of the matrix numerical values */ 1799 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1800 1801 /* insert into matrix-by row (this is why cannot directly read into array */ 1802 vals_ptr = vals; 1803 for (i=0; i<m; i++) { 1804 for (j=0; j<N; j++) { 1805 array[i + j*m] = *vals_ptr++; 1806 } 1807 } 1808 1809 /* read in other processors and ship out */ 1810 for (i=1; i<size; i++) { 1811 nz = (rowners[i+1] - rowners[i])*N; 1812 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1813 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1814 } 1815 } else { 1816 /* receive numeric values */ 1817 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1818 1819 /* receive message of values*/ 1820 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1821 1822 /* insert into matrix-by row (this is why cannot directly read into array */ 1823 vals_ptr = vals; 1824 for (i=0; i<m; i++) { 1825 for (j=0; j<N; j++) { 1826 array[i + j*m] = *vals_ptr++; 1827 } 1828 } 1829 } 1830 ierr = PetscFree(rowners);CHKERRQ(ierr); 1831 ierr = PetscFree(vals);CHKERRQ(ierr); 1832 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1833 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatLoad_MPIDense" 1839 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1840 { 1841 Mat A; 1842 PetscScalar *vals,*svals; 1843 MPI_Comm comm = ((PetscObject)viewer)->comm; 1844 MPI_Status status; 1845 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1846 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1847 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1848 PetscInt i,nz,j,rstart,rend; 1849 int fd; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1854 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1855 if (!rank) { 1856 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1858 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1859 } 1860 1861 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1862 M = header[1]; N = header[2]; nz = header[3]; 1863 1864 /* 1865 Handle case where matrix is stored on disk as a dense matrix 1866 */ 1867 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1868 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 /* determine ownership of all rows */ 1873 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1874 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1875 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1876 rowners[0] = 0; 1877 for (i=2; i<=size; i++) { 1878 rowners[i] += rowners[i-1]; 1879 } 1880 rstart = rowners[rank]; 1881 rend = rowners[rank+1]; 1882 1883 /* distribute row lengths to all processors */ 1884 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1885 offlens = ourlens + (rend-rstart); 1886 if (!rank) { 1887 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1888 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1889 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1890 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1891 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1892 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1893 } else { 1894 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1895 } 1896 1897 if (!rank) { 1898 /* calculate the number of nonzeros on each processor */ 1899 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1900 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1901 for (i=0; i<size; i++) { 1902 for (j=rowners[i]; j< rowners[i+1]; j++) { 1903 procsnz[i] += rowlengths[j]; 1904 } 1905 } 1906 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1907 1908 /* determine max buffer needed and allocate it */ 1909 maxnz = 0; 1910 for (i=0; i<size; i++) { 1911 maxnz = PetscMax(maxnz,procsnz[i]); 1912 } 1913 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1914 1915 /* read in my part of the matrix column indices */ 1916 nz = procsnz[0]; 1917 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1918 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1919 1920 /* read in every one elses and ship off */ 1921 for (i=1; i<size; i++) { 1922 nz = procsnz[i]; 1923 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1924 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1925 } 1926 ierr = PetscFree(cols);CHKERRQ(ierr); 1927 } else { 1928 /* determine buffer space needed for message */ 1929 nz = 0; 1930 for (i=0; i<m; i++) { 1931 nz += ourlens[i]; 1932 } 1933 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1934 1935 /* receive message of column indices*/ 1936 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1937 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1938 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1939 } 1940 1941 /* loop over local rows, determining number of off diagonal entries */ 1942 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1943 jj = 0; 1944 for (i=0; i<m; i++) { 1945 for (j=0; j<ourlens[i]; j++) { 1946 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1947 jj++; 1948 } 1949 } 1950 1951 /* create our matrix */ 1952 for (i=0; i<m; i++) { 1953 ourlens[i] -= offlens[i]; 1954 } 1955 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1956 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1957 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1958 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1959 A = *newmat; 1960 for (i=0; i<m; i++) { 1961 ourlens[i] += offlens[i]; 1962 } 1963 1964 if (!rank) { 1965 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1966 1967 /* read in my part of the matrix numerical values */ 1968 nz = procsnz[0]; 1969 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1970 1971 /* insert into matrix */ 1972 jj = rstart; 1973 smycols = mycols; 1974 svals = vals; 1975 for (i=0; i<m; i++) { 1976 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1977 smycols += ourlens[i]; 1978 svals += ourlens[i]; 1979 jj++; 1980 } 1981 1982 /* read in other processors and ship out */ 1983 for (i=1; i<size; i++) { 1984 nz = procsnz[i]; 1985 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1986 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1989 } else { 1990 /* receive numeric values */ 1991 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1992 1993 /* receive message of values*/ 1994 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1995 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1996 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1997 1998 /* insert into matrix */ 1999 jj = rstart; 2000 smycols = mycols; 2001 svals = vals; 2002 for (i=0; i<m; i++) { 2003 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2004 smycols += ourlens[i]; 2005 svals += ourlens[i]; 2006 jj++; 2007 } 2008 } 2009 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2010 ierr = PetscFree(vals);CHKERRQ(ierr); 2011 ierr = PetscFree(mycols);CHKERRQ(ierr); 2012 ierr = PetscFree(rowners);CHKERRQ(ierr); 2013 2014 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2015 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatEqual_MPIDense" 2021 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2022 { 2023 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2024 Mat a,b; 2025 PetscTruth flg; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 a = matA->A; 2030 b = matB->A; 2031 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2032 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #if defined(PETSC_HAVE_PLAPACK) 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2040 /*@C 2041 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2042 Level: developer 2043 2044 .keywords: Petsc, destroy, package, PLAPACK 2045 .seealso: PetscFinalize() 2046 @*/ 2047 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2048 { 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 ierr = PLA_Finalize();CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2058 /*@C 2059 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2060 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2061 when using static libraries. 2062 2063 Input Parameter: 2064 path - The dynamic library path, or PETSC_NULL 2065 2066 Level: developer 2067 2068 .keywords: Petsc, initialize, package, PLAPACK 2069 .seealso: PetscInitializePackage(), PetscInitialize() 2070 @*/ 2071 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2072 { 2073 MPI_Comm comm = PETSC_COMM_WORLD; 2074 PetscMPIInt size; 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 if (!PLA_Initialized(PETSC_NULL)) { 2079 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 Plapack_nprows = 1; 2082 Plapack_npcols = size; 2083 2084 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2085 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2086 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2087 #if defined(PETSC_USE_DEBUG) 2088 Plapack_ierror = 3; 2089 #else 2090 Plapack_ierror = 0; 2091 #endif 2092 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2093 if (Plapack_ierror){ 2094 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2095 } else { 2096 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2097 } 2098 2099 Plapack_nb_alg = 0; 2100 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2101 if (Plapack_nb_alg) { 2102 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2103 } 2104 PetscOptionsEnd(); 2105 2106 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2107 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2108 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 2114 #endif 2115